]>
Commit | Line | Data |
---|---|---|
659e93fc S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import json | |
4 | import random | |
659e93fc S |
5 | |
6 | from .common import InfoExtractor | |
7 | from ..compat import ( | |
8 | compat_b64decode, | |
9 | compat_HTTPError, | |
29f7c58a | 10 | compat_str, |
659e93fc S |
11 | ) |
12 | from ..utils import ( | |
29f7c58a | 13 | clean_html, |
659e93fc | 14 | ExtractorError, |
29f7c58a | 15 | js_to_json, |
16 | parse_duration, | |
17 | try_get, | |
18 | unified_timestamp, | |
659e93fc S |
19 | urlencode_postdata, |
20 | urljoin, | |
21 | ) | |
22 | ||
23 | ||
24 | class LinuxAcademyIE(InfoExtractor): | |
25 | _VALID_URL = r'''(?x) | |
26 | https?:// | |
27 | (?:www\.)?linuxacademy\.com/cp/ | |
28 | (?: | |
29 | courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| | |
30 | modules/view/id/(?P<course_id>\d+) | |
31 | ) | |
32 | ''' | |
33 | _TESTS = [{ | |
29f7c58a | 34 | 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', |
659e93fc | 35 | 'info_dict': { |
29f7c58a | 36 | 'id': '7971-2', |
659e93fc | 37 | 'ext': 'mp4', |
29f7c58a | 38 | 'title': 'What Is Data Science', |
39 | 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', | |
037cc66e | 40 | 'timestamp': int, # The timestamp and upload date changes |
41 | 'upload_date': r're:\d+', | |
29f7c58a | 42 | 'duration': 304, |
659e93fc S |
43 | }, |
44 | 'params': { | |
45 | 'skip_download': True, | |
46 | }, | |
47 | 'skip': 'Requires Linux Academy account credentials', | |
48 | }, { | |
49 | 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', | |
50 | 'only_matching': True, | |
51 | }, { | |
52 | 'url': 'https://linuxacademy.com/cp/modules/view/id/154', | |
53 | 'info_dict': { | |
54 | 'id': '154', | |
55 | 'title': 'AWS Certified Cloud Practitioner', | |
29f7c58a | 56 | 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', |
57 | 'duration': 28835, | |
659e93fc S |
58 | }, |
59 | 'playlist_count': 41, | |
60 | 'skip': 'Requires Linux Academy account credentials', | |
037cc66e | 61 | }, { |
62 | 'url': 'https://linuxacademy.com/cp/modules/view/id/39', | |
63 | 'info_dict': { | |
64 | 'id': '39', | |
65 | 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)', | |
66 | 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f', | |
67 | 'duration': 89280, | |
68 | }, | |
69 | 'playlist_count': 73, | |
70 | 'skip': 'Requires Linux Academy account credentials', | |
659e93fc S |
71 | }] |
72 | ||
73 | _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' | |
74 | _ORIGIN_URL = 'https://linuxacademy.com' | |
75 | _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' | |
76 | _NETRC_MACHINE = 'linuxacademy' | |
77 | ||
52efa4b3 | 78 | def _perform_login(self, username, password): |
659e93fc S |
79 | def random_string(): |
80 | return ''.join([ | |
81 | random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') | |
82 | for _ in range(32)]) | |
83 | ||
84 | webpage, urlh = self._download_webpage_handle( | |
85 | self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ | |
86 | 'client_id': self._CLIENT_ID, | |
87 | 'response_type': 'token id_token', | |
29f7c58a | 88 | 'response_mode': 'web_message', |
659e93fc S |
89 | 'redirect_uri': self._ORIGIN_URL, |
90 | 'scope': 'openid email user_impersonation profile', | |
91 | 'audience': self._ORIGIN_URL, | |
92 | 'state': random_string(), | |
93 | 'nonce': random_string(), | |
94 | }) | |
95 | ||
96 | login_data = self._parse_json( | |
97 | self._search_regex( | |
98 | r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, | |
99 | 'login info', group='value'), None, | |
100 | transform_source=lambda x: compat_b64decode(x).decode('utf-8') | |
101 | )['extraParams'] | |
102 | ||
103 | login_data.update({ | |
104 | 'client_id': self._CLIENT_ID, | |
105 | 'redirect_uri': self._ORIGIN_URL, | |
106 | 'tenant': 'lacausers', | |
3700c7ef | 107 | 'connection': 'Username-Password-ACG-Proxy', |
659e93fc S |
108 | 'username': username, |
109 | 'password': password, | |
110 | 'sso': 'true', | |
111 | }) | |
112 | ||
7947a1f7 | 113 | login_state_url = urlh.geturl() |
659e93fc S |
114 | |
115 | try: | |
116 | login_page = self._download_webpage( | |
117 | 'https://login.linuxacademy.com/usernamepassword/login', None, | |
118 | 'Downloading login page', data=json.dumps(login_data).encode(), | |
119 | headers={ | |
120 | 'Content-Type': 'application/json', | |
121 | 'Origin': 'https://login.linuxacademy.com', | |
122 | 'Referer': login_state_url, | |
123 | }) | |
124 | except ExtractorError as e: | |
125 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | |
126 | error = self._parse_json(e.cause.read(), None) | |
127 | message = error.get('description') or error['code'] | |
128 | raise ExtractorError( | |
129 | '%s said: %s' % (self.IE_NAME, message), expected=True) | |
130 | raise | |
131 | ||
132 | callback_page, urlh = self._download_webpage_handle( | |
133 | 'https://login.linuxacademy.com/login/callback', None, | |
134 | 'Downloading callback page', | |
135 | data=urlencode_postdata(self._hidden_inputs(login_page)), | |
136 | headers={ | |
137 | 'Content-Type': 'application/x-www-form-urlencoded', | |
138 | 'Origin': 'https://login.linuxacademy.com', | |
139 | 'Referer': login_state_url, | |
140 | }) | |
141 | ||
142 | access_token = self._search_regex( | |
7947a1f7 | 143 | r'access_token=([^=&]+)', urlh.geturl(), |
29f7c58a | 144 | 'access token', default=None) |
145 | if not access_token: | |
146 | access_token = self._parse_json( | |
147 | self._search_regex( | |
148 | r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, | |
149 | 'authorization response'), None, | |
150 | transform_source=js_to_json)['response']['access_token'] | |
659e93fc S |
151 | |
152 | self._download_webpage( | |
153 | 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' | |
154 | % access_token, None, 'Downloading token validation page') | |
155 | ||
156 | def _real_extract(self, url): | |
5ad28e7f | 157 | mobj = self._match_valid_url(url) |
659e93fc S |
158 | chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') |
159 | item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) | |
160 | ||
161 | webpage = self._download_webpage(url, item_id) | |
162 | ||
163 | # course path | |
164 | if course_id: | |
29f7c58a | 165 | module = self._parse_json( |
166 | self._search_regex( | |
037cc66e | 167 | r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'), |
29f7c58a | 168 | item_id) |
169 | entries = [] | |
170 | chapter_number = None | |
171 | chapter = None | |
172 | chapter_id = None | |
173 | for item in module['items']: | |
174 | if not isinstance(item, dict): | |
175 | continue | |
176 | ||
177 | def type_field(key): | |
178 | return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() | |
179 | type_fields = (type_field('name'), type_field('slug')) | |
180 | # Move to next module section | |
181 | if 'section' in type_fields: | |
182 | chapter = item.get('course_name') | |
183 | chapter_id = item.get('course_module') | |
184 | chapter_number = 1 if not chapter_number else chapter_number + 1 | |
185 | continue | |
186 | # Skip non-lessons | |
187 | if 'lesson' not in type_fields: | |
188 | continue | |
189 | lesson_url = urljoin(url, item.get('url')) | |
190 | if not lesson_url: | |
191 | continue | |
192 | title = item.get('title') or item.get('lesson_name') | |
193 | description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) | |
194 | entries.append({ | |
195 | '_type': 'url_transparent', | |
196 | 'url': lesson_url, | |
197 | 'ie_key': LinuxAcademyIE.ie_key(), | |
198 | 'title': title, | |
199 | 'description': description, | |
200 | 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), | |
201 | 'duration': parse_duration(item.get('duration')), | |
202 | 'chapter': chapter, | |
203 | 'chapter_id': chapter_id, | |
204 | 'chapter_number': chapter_number, | |
205 | }) | |
206 | return { | |
207 | '_type': 'playlist', | |
208 | 'entries': entries, | |
209 | 'id': course_id, | |
210 | 'title': module.get('title'), | |
211 | 'description': module.get('md_desc') or clean_html(module.get('desc')), | |
212 | 'duration': parse_duration(module.get('duration')), | |
213 | } | |
659e93fc S |
214 | |
215 | # single video path | |
29f7c58a | 216 | m3u8_url = self._parse_json( |
217 | self._search_regex( | |
218 | r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), | |
219 | item_id)[0]['file'] | |
220 | formats = self._extract_m3u8_formats( | |
221 | m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', | |
222 | m3u8_id='hls') | |
223 | self._sort_formats(formats) | |
224 | info = { | |
659e93fc | 225 | 'id': item_id, |
29f7c58a | 226 | 'formats': formats, |
227 | } | |
228 | lesson = self._parse_json( | |
229 | self._search_regex( | |
230 | (r'window\.lesson\s*=\s*({.+?})\s*;', | |
231 | r'player\.lesson\s*=\s*({.+?})\s*;'), | |
232 | webpage, 'lesson', default='{}'), item_id, fatal=False) | |
233 | if lesson: | |
234 | info.update({ | |
235 | 'title': lesson.get('lesson_name'), | |
236 | 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), | |
237 | 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), | |
238 | 'duration': parse_duration(lesson.get('duration')), | |
239 | }) | |
240 | if not info.get('title'): | |
241 | info['title'] = self._search_regex( | |
242 | (r'>Lecture\s*:\s*(?P<value>[^<]+)', | |
243 | r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, | |
244 | 'title', group='value') | |
659e93fc | 245 | return info |