]>
Commit | Line | Data |
---|---|---|
add96eb9 | 1 | import base64 |
2 | ||
c701472f | 3 | from .common import InfoExtractor |
c701472f | 4 | from ..utils import ( |
c701472f | 5 | ExtractorError, |
e897bd82 | 6 | clean_html, |
c701472f S |
7 | int_or_none, |
8 | str_or_none, | |
9 | try_get, | |
10 | url_or_none, | |
11 | urlencode_postdata, | |
12 | urljoin, | |
13 | ) | |
14 | ||
15 | ||
66d04c74 | 16 | class PlatziBaseIE(InfoExtractor): |
c701472f S |
17 | _LOGIN_URL = 'https://platzi.com/login/' |
18 | _NETRC_MACHINE = 'platzi' | |
19 | ||
52efa4b3 | 20 | def _perform_login(self, username, password): |
c701472f S |
21 | login_page = self._download_webpage( |
22 | self._LOGIN_URL, None, 'Downloading login page') | |
23 | ||
24 | login_form = self._hidden_inputs(login_page) | |
25 | ||
26 | login_form.update({ | |
27 | 'email': username, | |
28 | 'password': password, | |
29 | }) | |
30 | ||
31 | urlh = self._request_webpage( | |
32 | self._LOGIN_URL, None, 'Logging in', | |
33 | data=urlencode_postdata(login_form), | |
34 | headers={'Referer': self._LOGIN_URL}) | |
35 | ||
36 | # login succeeded | |
3d2623a8 | 37 | if 'platzi.com/login' not in urlh.url: |
c701472f S |
38 | return |
39 | ||
40 | login_error = self._webpage_read_content( | |
41 | urlh, self._LOGIN_URL, None, 'Downloading login error page') | |
42 | ||
43 | login = self._parse_json( | |
44 | self._search_regex( | |
45 | r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), | |
46 | None) | |
47 | ||
48 | for kind in ('error', 'password', 'nonFields'): | |
add96eb9 | 49 | error = str_or_none(login.get(f'{kind}Error')) |
c701472f S |
50 | if error: |
51 | raise ExtractorError( | |
add96eb9 | 52 | f'Unable to login: {error}', expected=True) |
c701472f S |
53 | raise ExtractorError('Unable to log in') |
54 | ||
66d04c74 S |
55 | |
56 | class PlatziIE(PlatziBaseIE): | |
57 | _VALID_URL = r'''(?x) | |
58 | https?:// | |
59 | (?: | |
60 | platzi\.com/clases| # es version | |
61 | courses\.platzi\.com/classes # en version | |
62 | )/[^/]+/(?P<id>\d+)-[^/?\#&]+ | |
63 | ''' | |
64 | ||
65 | _TESTS = [{ | |
66 | 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', | |
67 | 'md5': '8f56448241005b561c10f11a595b37e3', | |
68 | 'info_dict': { | |
69 | 'id': '12074', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Creando nuestra primera página', | |
72 | 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', | |
73 | 'duration': 420, | |
74 | }, | |
75 | 'skip': 'Requires platzi account credentials', | |
76 | }, { | |
77 | 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', | |
78 | 'info_dict': { | |
79 | 'id': '13430', | |
80 | 'ext': 'mp4', | |
81 | 'title': 'Background', | |
82 | 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', | |
83 | 'duration': 360, | |
84 | }, | |
85 | 'skip': 'Requires platzi account credentials', | |
86 | 'params': { | |
87 | 'skip_download': True, | |
88 | }, | |
89 | }] | |
90 | ||
c701472f S |
91 | def _real_extract(self, url): |
92 | lecture_id = self._match_id(url) | |
93 | ||
94 | webpage = self._download_webpage(url, lecture_id) | |
95 | ||
96 | data = self._parse_json( | |
97 | self._search_regex( | |
31dbd054 S |
98 | # client_data may contain "};" so that we have to try more |
99 | # strict regex first | |
100 | (r'client_data\s*=\s*({.+?})\s*;\s*\n', | |
101 | r'client_data\s*=\s*({.+?})\s*;'), | |
102 | webpage, 'client data'), | |
c701472f S |
103 | lecture_id) |
104 | ||
105 | material = data['initialState']['material'] | |
106 | desc = material['description'] | |
107 | title = desc['title'] | |
108 | ||
109 | formats = [] | |
110 | for server_id, server in material['videos'].items(): | |
111 | if not isinstance(server, dict): | |
112 | continue | |
113 | for format_id in ('hls', 'dash'): | |
114 | format_url = url_or_none(server.get(format_id)) | |
115 | if not format_url: | |
116 | continue | |
117 | if format_id == 'hls': | |
118 | formats.extend(self._extract_m3u8_formats( | |
119 | format_url, lecture_id, 'mp4', | |
120 | entry_protocol='m3u8_native', m3u8_id=format_id, | |
add96eb9 | 121 | note=f'Downloading {server_id} m3u8 information', |
c701472f S |
122 | fatal=False)) |
123 | elif format_id == 'dash': | |
124 | formats.extend(self._extract_mpd_formats( | |
125 | format_url, lecture_id, mpd_id=format_id, | |
add96eb9 | 126 | note=f'Downloading {server_id} MPD manifest', |
c701472f | 127 | fatal=False)) |
c701472f S |
128 | |
129 | content = str_or_none(desc.get('content')) | |
add96eb9 | 130 | description = (clean_html(base64.b64decode(content).decode('utf-8')) |
c701472f S |
131 | if content else None) |
132 | duration = int_or_none(material.get('duration'), invscale=60) | |
133 | ||
134 | return { | |
135 | 'id': lecture_id, | |
136 | 'title': title, | |
137 | 'description': description, | |
138 | 'duration': duration, | |
139 | 'formats': formats, | |
140 | } | |
141 | ||
142 | ||
66d04c74 | 143 | class PlatziCourseIE(PlatziBaseIE): |
c701472f S |
144 | _VALID_URL = r'''(?x) |
145 | https?:// | |
146 | (?: | |
147 | platzi\.com/clases| # es version | |
148 | courses\.platzi\.com/classes # en version | |
149 | )/(?P<id>[^/?\#&]+) | |
150 | ''' | |
151 | _TESTS = [{ | |
152 | 'url': 'https://platzi.com/clases/next-js/', | |
153 | 'info_dict': { | |
154 | 'id': '1311', | |
155 | 'title': 'Curso de Next.js', | |
156 | }, | |
157 | 'playlist_count': 22, | |
158 | }, { | |
159 | 'url': 'https://courses.platzi.com/classes/communication-codestream/', | |
160 | 'info_dict': { | |
161 | 'id': '1367', | |
162 | 'title': 'Codestream Course', | |
163 | }, | |
164 | 'playlist_count': 14, | |
165 | }] | |
166 | ||
167 | @classmethod | |
168 | def suitable(cls, url): | |
add96eb9 | 169 | return False if PlatziIE.suitable(url) else super().suitable(url) |
c701472f S |
170 | |
171 | def _real_extract(self, url): | |
172 | course_name = self._match_id(url) | |
173 | ||
174 | webpage = self._download_webpage(url, course_name) | |
175 | ||
176 | props = self._parse_json( | |
177 | self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), | |
178 | course_name)['initialProps'] | |
179 | ||
180 | entries = [] | |
181 | for chapter_num, chapter in enumerate(props['concepts'], 1): | |
182 | if not isinstance(chapter, dict): | |
183 | continue | |
184 | materials = chapter.get('materials') | |
185 | if not materials or not isinstance(materials, list): | |
186 | continue | |
187 | chapter_title = chapter.get('title') | |
188 | chapter_id = str_or_none(chapter.get('id')) | |
189 | for material in materials: | |
190 | if not isinstance(material, dict): | |
191 | continue | |
192 | if material.get('material_type') != 'video': | |
193 | continue | |
194 | video_url = urljoin(url, material.get('url')) | |
195 | if not video_url: | |
196 | continue | |
197 | entries.append({ | |
198 | '_type': 'url_transparent', | |
199 | 'url': video_url, | |
200 | 'title': str_or_none(material.get('name')), | |
201 | 'id': str_or_none(material.get('id')), | |
202 | 'ie_key': PlatziIE.ie_key(), | |
203 | 'chapter': chapter_title, | |
204 | 'chapter_number': chapter_num, | |
205 | 'chapter_id': chapter_id, | |
206 | }) | |
207 | ||
add96eb9 | 208 | course_id = str(try_get(props, lambda x: x['course']['id'])) |
209 | course_title = try_get(props, lambda x: x['course']['name'], str) | |
c701472f S |
210 | |
211 | return self.playlist_result(entries, course_id, course_title) |