]>
Commit | Line | Data |
---|---|---|
659e93fc S |
1 | import json |
2 | import random | |
659e93fc S |
3 | |
4 | from .common import InfoExtractor | |
3d2623a8 | 5 | from ..compat import compat_b64decode, compat_str |
6 | from ..networking.exceptions import HTTPError | |
659e93fc | 7 | from ..utils import ( |
29f7c58a | 8 | clean_html, |
659e93fc | 9 | ExtractorError, |
29f7c58a | 10 | js_to_json, |
11 | parse_duration, | |
12 | try_get, | |
13 | unified_timestamp, | |
659e93fc S |
14 | urlencode_postdata, |
15 | urljoin, | |
16 | ) | |
17 | ||
18 | ||
19 | class LinuxAcademyIE(InfoExtractor): | |
20 | _VALID_URL = r'''(?x) | |
21 | https?:// | |
22 | (?:www\.)?linuxacademy\.com/cp/ | |
23 | (?: | |
24 | courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| | |
25 | modules/view/id/(?P<course_id>\d+) | |
26 | ) | |
27 | ''' | |
28 | _TESTS = [{ | |
29f7c58a | 29 | 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', |
659e93fc | 30 | 'info_dict': { |
29f7c58a | 31 | 'id': '7971-2', |
659e93fc | 32 | 'ext': 'mp4', |
29f7c58a | 33 | 'title': 'What Is Data Science', |
34 | 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', | |
037cc66e | 35 | 'timestamp': int, # The timestamp and upload date changes |
36 | 'upload_date': r're:\d+', | |
29f7c58a | 37 | 'duration': 304, |
659e93fc S |
38 | }, |
39 | 'params': { | |
40 | 'skip_download': True, | |
41 | }, | |
42 | 'skip': 'Requires Linux Academy account credentials', | |
43 | }, { | |
44 | 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', | |
45 | 'only_matching': True, | |
46 | }, { | |
47 | 'url': 'https://linuxacademy.com/cp/modules/view/id/154', | |
48 | 'info_dict': { | |
49 | 'id': '154', | |
50 | 'title': 'AWS Certified Cloud Practitioner', | |
29f7c58a | 51 | 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', |
52 | 'duration': 28835, | |
659e93fc S |
53 | }, |
54 | 'playlist_count': 41, | |
55 | 'skip': 'Requires Linux Academy account credentials', | |
037cc66e | 56 | }, { |
57 | 'url': 'https://linuxacademy.com/cp/modules/view/id/39', | |
58 | 'info_dict': { | |
59 | 'id': '39', | |
60 | 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)', | |
61 | 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f', | |
62 | 'duration': 89280, | |
63 | }, | |
64 | 'playlist_count': 73, | |
65 | 'skip': 'Requires Linux Academy account credentials', | |
659e93fc S |
66 | }] |
67 | ||
68 | _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' | |
69 | _ORIGIN_URL = 'https://linuxacademy.com' | |
70 | _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' | |
71 | _NETRC_MACHINE = 'linuxacademy' | |
72 | ||
52efa4b3 | 73 | def _perform_login(self, username, password): |
659e93fc | 74 | def random_string(): |
efa944f4 AM |
75 | return ''.join(random.choices( |
76 | '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32)) | |
659e93fc S |
77 | |
78 | webpage, urlh = self._download_webpage_handle( | |
79 | self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ | |
80 | 'client_id': self._CLIENT_ID, | |
81 | 'response_type': 'token id_token', | |
29f7c58a | 82 | 'response_mode': 'web_message', |
659e93fc S |
83 | 'redirect_uri': self._ORIGIN_URL, |
84 | 'scope': 'openid email user_impersonation profile', | |
85 | 'audience': self._ORIGIN_URL, | |
86 | 'state': random_string(), | |
87 | 'nonce': random_string(), | |
88 | }) | |
89 | ||
90 | login_data = self._parse_json( | |
91 | self._search_regex( | |
92 | r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, | |
93 | 'login info', group='value'), None, | |
94 | transform_source=lambda x: compat_b64decode(x).decode('utf-8') | |
95 | )['extraParams'] | |
96 | ||
97 | login_data.update({ | |
98 | 'client_id': self._CLIENT_ID, | |
99 | 'redirect_uri': self._ORIGIN_URL, | |
100 | 'tenant': 'lacausers', | |
3700c7ef | 101 | 'connection': 'Username-Password-ACG-Proxy', |
659e93fc S |
102 | 'username': username, |
103 | 'password': password, | |
104 | 'sso': 'true', | |
105 | }) | |
106 | ||
3d2623a8 | 107 | login_state_url = urlh.url |
659e93fc S |
108 | |
109 | try: | |
110 | login_page = self._download_webpage( | |
111 | 'https://login.linuxacademy.com/usernamepassword/login', None, | |
112 | 'Downloading login page', data=json.dumps(login_data).encode(), | |
113 | headers={ | |
114 | 'Content-Type': 'application/json', | |
115 | 'Origin': 'https://login.linuxacademy.com', | |
116 | 'Referer': login_state_url, | |
117 | }) | |
118 | except ExtractorError as e: | |
3d2623a8 | 119 | if isinstance(e.cause, HTTPError) and e.cause.status == 401: |
120 | error = self._parse_json(e.cause.response.read(), None) | |
659e93fc S |
121 | message = error.get('description') or error['code'] |
122 | raise ExtractorError( | |
123 | '%s said: %s' % (self.IE_NAME, message), expected=True) | |
124 | raise | |
125 | ||
126 | callback_page, urlh = self._download_webpage_handle( | |
127 | 'https://login.linuxacademy.com/login/callback', None, | |
128 | 'Downloading callback page', | |
129 | data=urlencode_postdata(self._hidden_inputs(login_page)), | |
130 | headers={ | |
131 | 'Content-Type': 'application/x-www-form-urlencoded', | |
132 | 'Origin': 'https://login.linuxacademy.com', | |
133 | 'Referer': login_state_url, | |
134 | }) | |
135 | ||
136 | access_token = self._search_regex( | |
3d2623a8 | 137 | r'access_token=([^=&]+)', urlh.url, |
29f7c58a | 138 | 'access token', default=None) |
139 | if not access_token: | |
140 | access_token = self._parse_json( | |
141 | self._search_regex( | |
142 | r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, | |
143 | 'authorization response'), None, | |
144 | transform_source=js_to_json)['response']['access_token'] | |
659e93fc S |
145 | |
146 | self._download_webpage( | |
147 | 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' | |
148 | % access_token, None, 'Downloading token validation page') | |
149 | ||
150 | def _real_extract(self, url): | |
5ad28e7f | 151 | mobj = self._match_valid_url(url) |
659e93fc S |
152 | chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') |
153 | item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) | |
154 | ||
155 | webpage = self._download_webpage(url, item_id) | |
156 | ||
157 | # course path | |
158 | if course_id: | |
29f7c58a | 159 | module = self._parse_json( |
160 | self._search_regex( | |
037cc66e | 161 | r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'), |
29f7c58a | 162 | item_id) |
163 | entries = [] | |
164 | chapter_number = None | |
165 | chapter = None | |
166 | chapter_id = None | |
167 | for item in module['items']: | |
168 | if not isinstance(item, dict): | |
169 | continue | |
170 | ||
171 | def type_field(key): | |
172 | return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() | |
173 | type_fields = (type_field('name'), type_field('slug')) | |
174 | # Move to next module section | |
175 | if 'section' in type_fields: | |
176 | chapter = item.get('course_name') | |
177 | chapter_id = item.get('course_module') | |
178 | chapter_number = 1 if not chapter_number else chapter_number + 1 | |
179 | continue | |
180 | # Skip non-lessons | |
181 | if 'lesson' not in type_fields: | |
182 | continue | |
183 | lesson_url = urljoin(url, item.get('url')) | |
184 | if not lesson_url: | |
185 | continue | |
186 | title = item.get('title') or item.get('lesson_name') | |
187 | description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) | |
188 | entries.append({ | |
189 | '_type': 'url_transparent', | |
190 | 'url': lesson_url, | |
191 | 'ie_key': LinuxAcademyIE.ie_key(), | |
192 | 'title': title, | |
193 | 'description': description, | |
194 | 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), | |
195 | 'duration': parse_duration(item.get('duration')), | |
196 | 'chapter': chapter, | |
197 | 'chapter_id': chapter_id, | |
198 | 'chapter_number': chapter_number, | |
199 | }) | |
200 | return { | |
201 | '_type': 'playlist', | |
202 | 'entries': entries, | |
203 | 'id': course_id, | |
204 | 'title': module.get('title'), | |
205 | 'description': module.get('md_desc') or clean_html(module.get('desc')), | |
206 | 'duration': parse_duration(module.get('duration')), | |
207 | } | |
659e93fc S |
208 | |
209 | # single video path | |
29f7c58a | 210 | m3u8_url = self._parse_json( |
211 | self._search_regex( | |
212 | r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), | |
213 | item_id)[0]['file'] | |
214 | formats = self._extract_m3u8_formats( | |
215 | m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', | |
216 | m3u8_id='hls') | |
29f7c58a | 217 | info = { |
659e93fc | 218 | 'id': item_id, |
29f7c58a | 219 | 'formats': formats, |
220 | } | |
221 | lesson = self._parse_json( | |
222 | self._search_regex( | |
223 | (r'window\.lesson\s*=\s*({.+?})\s*;', | |
224 | r'player\.lesson\s*=\s*({.+?})\s*;'), | |
225 | webpage, 'lesson', default='{}'), item_id, fatal=False) | |
226 | if lesson: | |
227 | info.update({ | |
228 | 'title': lesson.get('lesson_name'), | |
229 | 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), | |
230 | 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), | |
231 | 'duration': parse_duration(lesson.get('duration')), | |
232 | }) | |
233 | if not info.get('title'): | |
234 | info['title'] = self._search_regex( | |
235 | (r'>Lecture\s*:\s*(?P<value>[^<]+)', | |
236 | r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, | |
237 | 'title', group='value') | |
659e93fc | 238 | return info |