]>
Commit | Line | Data |
---|---|---|
659e93fc S |
1 | import json |
2 | import random | |
659e93fc S |
3 | |
4 | from .common import InfoExtractor | |
5 | from ..compat import ( | |
6 | compat_b64decode, | |
7 | compat_HTTPError, | |
29f7c58a | 8 | compat_str, |
659e93fc S |
9 | ) |
10 | from ..utils import ( | |
29f7c58a | 11 | clean_html, |
659e93fc | 12 | ExtractorError, |
29f7c58a | 13 | js_to_json, |
14 | parse_duration, | |
15 | try_get, | |
16 | unified_timestamp, | |
659e93fc S |
17 | urlencode_postdata, |
18 | urljoin, | |
19 | ) | |
20 | ||
21 | ||
22 | class LinuxAcademyIE(InfoExtractor): | |
23 | _VALID_URL = r'''(?x) | |
24 | https?:// | |
25 | (?:www\.)?linuxacademy\.com/cp/ | |
26 | (?: | |
27 | courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| | |
28 | modules/view/id/(?P<course_id>\d+) | |
29 | ) | |
30 | ''' | |
31 | _TESTS = [{ | |
29f7c58a | 32 | 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', |
659e93fc | 33 | 'info_dict': { |
29f7c58a | 34 | 'id': '7971-2', |
659e93fc | 35 | 'ext': 'mp4', |
29f7c58a | 36 | 'title': 'What Is Data Science', |
37 | 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', | |
037cc66e | 38 | 'timestamp': int, # The timestamp and upload date changes |
39 | 'upload_date': r're:\d+', | |
29f7c58a | 40 | 'duration': 304, |
659e93fc S |
41 | }, |
42 | 'params': { | |
43 | 'skip_download': True, | |
44 | }, | |
45 | 'skip': 'Requires Linux Academy account credentials', | |
46 | }, { | |
47 | 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', | |
48 | 'only_matching': True, | |
49 | }, { | |
50 | 'url': 'https://linuxacademy.com/cp/modules/view/id/154', | |
51 | 'info_dict': { | |
52 | 'id': '154', | |
53 | 'title': 'AWS Certified Cloud Practitioner', | |
29f7c58a | 54 | 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', |
55 | 'duration': 28835, | |
659e93fc S |
56 | }, |
57 | 'playlist_count': 41, | |
58 | 'skip': 'Requires Linux Academy account credentials', | |
037cc66e | 59 | }, { |
60 | 'url': 'https://linuxacademy.com/cp/modules/view/id/39', | |
61 | 'info_dict': { | |
62 | 'id': '39', | |
63 | 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)', | |
64 | 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f', | |
65 | 'duration': 89280, | |
66 | }, | |
67 | 'playlist_count': 73, | |
68 | 'skip': 'Requires Linux Academy account credentials', | |
659e93fc S |
69 | }] |
70 | ||
71 | _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' | |
72 | _ORIGIN_URL = 'https://linuxacademy.com' | |
73 | _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' | |
74 | _NETRC_MACHINE = 'linuxacademy' | |
75 | ||
52efa4b3 | 76 | def _perform_login(self, username, password): |
659e93fc S |
77 | def random_string(): |
78 | return ''.join([ | |
79 | random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') | |
80 | for _ in range(32)]) | |
81 | ||
82 | webpage, urlh = self._download_webpage_handle( | |
83 | self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ | |
84 | 'client_id': self._CLIENT_ID, | |
85 | 'response_type': 'token id_token', | |
29f7c58a | 86 | 'response_mode': 'web_message', |
659e93fc S |
87 | 'redirect_uri': self._ORIGIN_URL, |
88 | 'scope': 'openid email user_impersonation profile', | |
89 | 'audience': self._ORIGIN_URL, | |
90 | 'state': random_string(), | |
91 | 'nonce': random_string(), | |
92 | }) | |
93 | ||
94 | login_data = self._parse_json( | |
95 | self._search_regex( | |
96 | r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, | |
97 | 'login info', group='value'), None, | |
98 | transform_source=lambda x: compat_b64decode(x).decode('utf-8') | |
99 | )['extraParams'] | |
100 | ||
101 | login_data.update({ | |
102 | 'client_id': self._CLIENT_ID, | |
103 | 'redirect_uri': self._ORIGIN_URL, | |
104 | 'tenant': 'lacausers', | |
3700c7ef | 105 | 'connection': 'Username-Password-ACG-Proxy', |
659e93fc S |
106 | 'username': username, |
107 | 'password': password, | |
108 | 'sso': 'true', | |
109 | }) | |
110 | ||
7947a1f7 | 111 | login_state_url = urlh.geturl() |
659e93fc S |
112 | |
113 | try: | |
114 | login_page = self._download_webpage( | |
115 | 'https://login.linuxacademy.com/usernamepassword/login', None, | |
116 | 'Downloading login page', data=json.dumps(login_data).encode(), | |
117 | headers={ | |
118 | 'Content-Type': 'application/json', | |
119 | 'Origin': 'https://login.linuxacademy.com', | |
120 | 'Referer': login_state_url, | |
121 | }) | |
122 | except ExtractorError as e: | |
123 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | |
124 | error = self._parse_json(e.cause.read(), None) | |
125 | message = error.get('description') or error['code'] | |
126 | raise ExtractorError( | |
127 | '%s said: %s' % (self.IE_NAME, message), expected=True) | |
128 | raise | |
129 | ||
130 | callback_page, urlh = self._download_webpage_handle( | |
131 | 'https://login.linuxacademy.com/login/callback', None, | |
132 | 'Downloading callback page', | |
133 | data=urlencode_postdata(self._hidden_inputs(login_page)), | |
134 | headers={ | |
135 | 'Content-Type': 'application/x-www-form-urlencoded', | |
136 | 'Origin': 'https://login.linuxacademy.com', | |
137 | 'Referer': login_state_url, | |
138 | }) | |
139 | ||
140 | access_token = self._search_regex( | |
7947a1f7 | 141 | r'access_token=([^=&]+)', urlh.geturl(), |
29f7c58a | 142 | 'access token', default=None) |
143 | if not access_token: | |
144 | access_token = self._parse_json( | |
145 | self._search_regex( | |
146 | r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, | |
147 | 'authorization response'), None, | |
148 | transform_source=js_to_json)['response']['access_token'] | |
659e93fc S |
149 | |
150 | self._download_webpage( | |
151 | 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' | |
152 | % access_token, None, 'Downloading token validation page') | |
153 | ||
154 | def _real_extract(self, url): | |
5ad28e7f | 155 | mobj = self._match_valid_url(url) |
659e93fc S |
156 | chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') |
157 | item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) | |
158 | ||
159 | webpage = self._download_webpage(url, item_id) | |
160 | ||
161 | # course path | |
162 | if course_id: | |
29f7c58a | 163 | module = self._parse_json( |
164 | self._search_regex( | |
037cc66e | 165 | r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'), |
29f7c58a | 166 | item_id) |
167 | entries = [] | |
168 | chapter_number = None | |
169 | chapter = None | |
170 | chapter_id = None | |
171 | for item in module['items']: | |
172 | if not isinstance(item, dict): | |
173 | continue | |
174 | ||
175 | def type_field(key): | |
176 | return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() | |
177 | type_fields = (type_field('name'), type_field('slug')) | |
178 | # Move to next module section | |
179 | if 'section' in type_fields: | |
180 | chapter = item.get('course_name') | |
181 | chapter_id = item.get('course_module') | |
182 | chapter_number = 1 if not chapter_number else chapter_number + 1 | |
183 | continue | |
184 | # Skip non-lessons | |
185 | if 'lesson' not in type_fields: | |
186 | continue | |
187 | lesson_url = urljoin(url, item.get('url')) | |
188 | if not lesson_url: | |
189 | continue | |
190 | title = item.get('title') or item.get('lesson_name') | |
191 | description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) | |
192 | entries.append({ | |
193 | '_type': 'url_transparent', | |
194 | 'url': lesson_url, | |
195 | 'ie_key': LinuxAcademyIE.ie_key(), | |
196 | 'title': title, | |
197 | 'description': description, | |
198 | 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), | |
199 | 'duration': parse_duration(item.get('duration')), | |
200 | 'chapter': chapter, | |
201 | 'chapter_id': chapter_id, | |
202 | 'chapter_number': chapter_number, | |
203 | }) | |
204 | return { | |
205 | '_type': 'playlist', | |
206 | 'entries': entries, | |
207 | 'id': course_id, | |
208 | 'title': module.get('title'), | |
209 | 'description': module.get('md_desc') or clean_html(module.get('desc')), | |
210 | 'duration': parse_duration(module.get('duration')), | |
211 | } | |
659e93fc S |
212 | |
213 | # single video path | |
29f7c58a | 214 | m3u8_url = self._parse_json( |
215 | self._search_regex( | |
216 | r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), | |
217 | item_id)[0]['file'] | |
218 | formats = self._extract_m3u8_formats( | |
219 | m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', | |
220 | m3u8_id='hls') | |
221 | self._sort_formats(formats) | |
222 | info = { | |
659e93fc | 223 | 'id': item_id, |
29f7c58a | 224 | 'formats': formats, |
225 | } | |
226 | lesson = self._parse_json( | |
227 | self._search_regex( | |
228 | (r'window\.lesson\s*=\s*({.+?})\s*;', | |
229 | r'player\.lesson\s*=\s*({.+?})\s*;'), | |
230 | webpage, 'lesson', default='{}'), item_id, fatal=False) | |
231 | if lesson: | |
232 | info.update({ | |
233 | 'title': lesson.get('lesson_name'), | |
234 | 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), | |
235 | 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), | |
236 | 'duration': parse_duration(lesson.get('duration')), | |
237 | }) | |
238 | if not info.get('title'): | |
239 | info['title'] = self._search_regex( | |
240 | (r'>Lecture\s*:\s*(?P<value>[^<]+)', | |
241 | r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, | |
242 | 'title', group='value') | |
659e93fc | 243 | return info |