]>
Commit | Line | Data |
---|---|---|
c701472f S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import ( | |
6 | compat_b64decode, | |
7 | compat_str, | |
8 | ) | |
9 | from ..utils import ( | |
10 | clean_html, | |
11 | ExtractorError, | |
12 | int_or_none, | |
13 | str_or_none, | |
14 | try_get, | |
15 | url_or_none, | |
16 | urlencode_postdata, | |
17 | urljoin, | |
18 | ) | |
19 | ||
20 | ||
66d04c74 | 21 | class PlatziBaseIE(InfoExtractor): |
c701472f S |
22 | _LOGIN_URL = 'https://platzi.com/login/' |
23 | _NETRC_MACHINE = 'platzi' | |
24 | ||
c701472f S |
25 | def _real_initialize(self): |
26 | self._login() | |
27 | ||
28 | def _login(self): | |
29 | username, password = self._get_login_info() | |
30 | if username is None: | |
31 | return | |
32 | ||
33 | login_page = self._download_webpage( | |
34 | self._LOGIN_URL, None, 'Downloading login page') | |
35 | ||
36 | login_form = self._hidden_inputs(login_page) | |
37 | ||
38 | login_form.update({ | |
39 | 'email': username, | |
40 | 'password': password, | |
41 | }) | |
42 | ||
43 | urlh = self._request_webpage( | |
44 | self._LOGIN_URL, None, 'Logging in', | |
45 | data=urlencode_postdata(login_form), | |
46 | headers={'Referer': self._LOGIN_URL}) | |
47 | ||
48 | # login succeeded | |
7947a1f7 | 49 | if 'platzi.com/login' not in urlh.geturl(): |
c701472f S |
50 | return |
51 | ||
52 | login_error = self._webpage_read_content( | |
53 | urlh, self._LOGIN_URL, None, 'Downloading login error page') | |
54 | ||
55 | login = self._parse_json( | |
56 | self._search_regex( | |
57 | r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), | |
58 | None) | |
59 | ||
60 | for kind in ('error', 'password', 'nonFields'): | |
61 | error = str_or_none(login.get('%sError' % kind)) | |
62 | if error: | |
63 | raise ExtractorError( | |
64 | 'Unable to login: %s' % error, expected=True) | |
65 | raise ExtractorError('Unable to log in') | |
66 | ||
66d04c74 S |
67 | |
68 | class PlatziIE(PlatziBaseIE): | |
69 | _VALID_URL = r'''(?x) | |
70 | https?:// | |
71 | (?: | |
72 | platzi\.com/clases| # es version | |
73 | courses\.platzi\.com/classes # en version | |
74 | )/[^/]+/(?P<id>\d+)-[^/?\#&]+ | |
75 | ''' | |
76 | ||
77 | _TESTS = [{ | |
78 | 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', | |
79 | 'md5': '8f56448241005b561c10f11a595b37e3', | |
80 | 'info_dict': { | |
81 | 'id': '12074', | |
82 | 'ext': 'mp4', | |
83 | 'title': 'Creando nuestra primera página', | |
84 | 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', | |
85 | 'duration': 420, | |
86 | }, | |
87 | 'skip': 'Requires platzi account credentials', | |
88 | }, { | |
89 | 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', | |
90 | 'info_dict': { | |
91 | 'id': '13430', | |
92 | 'ext': 'mp4', | |
93 | 'title': 'Background', | |
94 | 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', | |
95 | 'duration': 360, | |
96 | }, | |
97 | 'skip': 'Requires platzi account credentials', | |
98 | 'params': { | |
99 | 'skip_download': True, | |
100 | }, | |
101 | }] | |
102 | ||
c701472f S |
103 | def _real_extract(self, url): |
104 | lecture_id = self._match_id(url) | |
105 | ||
106 | webpage = self._download_webpage(url, lecture_id) | |
107 | ||
108 | data = self._parse_json( | |
109 | self._search_regex( | |
31dbd054 S |
110 | # client_data may contain "};" so that we have to try more |
111 | # strict regex first | |
112 | (r'client_data\s*=\s*({.+?})\s*;\s*\n', | |
113 | r'client_data\s*=\s*({.+?})\s*;'), | |
114 | webpage, 'client data'), | |
c701472f S |
115 | lecture_id) |
116 | ||
117 | material = data['initialState']['material'] | |
118 | desc = material['description'] | |
119 | title = desc['title'] | |
120 | ||
121 | formats = [] | |
122 | for server_id, server in material['videos'].items(): | |
123 | if not isinstance(server, dict): | |
124 | continue | |
125 | for format_id in ('hls', 'dash'): | |
126 | format_url = url_or_none(server.get(format_id)) | |
127 | if not format_url: | |
128 | continue | |
129 | if format_id == 'hls': | |
130 | formats.extend(self._extract_m3u8_formats( | |
131 | format_url, lecture_id, 'mp4', | |
132 | entry_protocol='m3u8_native', m3u8_id=format_id, | |
133 | note='Downloading %s m3u8 information' % server_id, | |
134 | fatal=False)) | |
135 | elif format_id == 'dash': | |
136 | formats.extend(self._extract_mpd_formats( | |
137 | format_url, lecture_id, mpd_id=format_id, | |
138 | note='Downloading %s MPD manifest' % server_id, | |
139 | fatal=False)) | |
140 | self._sort_formats(formats) | |
141 | ||
142 | content = str_or_none(desc.get('content')) | |
143 | description = (clean_html(compat_b64decode(content).decode('utf-8')) | |
144 | if content else None) | |
145 | duration = int_or_none(material.get('duration'), invscale=60) | |
146 | ||
147 | return { | |
148 | 'id': lecture_id, | |
149 | 'title': title, | |
150 | 'description': description, | |
151 | 'duration': duration, | |
152 | 'formats': formats, | |
153 | } | |
154 | ||
155 | ||
66d04c74 | 156 | class PlatziCourseIE(PlatziBaseIE): |
c701472f S |
157 | _VALID_URL = r'''(?x) |
158 | https?:// | |
159 | (?: | |
160 | platzi\.com/clases| # es version | |
161 | courses\.platzi\.com/classes # en version | |
162 | )/(?P<id>[^/?\#&]+) | |
163 | ''' | |
164 | _TESTS = [{ | |
165 | 'url': 'https://platzi.com/clases/next-js/', | |
166 | 'info_dict': { | |
167 | 'id': '1311', | |
168 | 'title': 'Curso de Next.js', | |
169 | }, | |
170 | 'playlist_count': 22, | |
171 | }, { | |
172 | 'url': 'https://courses.platzi.com/classes/communication-codestream/', | |
173 | 'info_dict': { | |
174 | 'id': '1367', | |
175 | 'title': 'Codestream Course', | |
176 | }, | |
177 | 'playlist_count': 14, | |
178 | }] | |
179 | ||
180 | @classmethod | |
181 | def suitable(cls, url): | |
182 | return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) | |
183 | ||
184 | def _real_extract(self, url): | |
185 | course_name = self._match_id(url) | |
186 | ||
187 | webpage = self._download_webpage(url, course_name) | |
188 | ||
189 | props = self._parse_json( | |
190 | self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), | |
191 | course_name)['initialProps'] | |
192 | ||
193 | entries = [] | |
194 | for chapter_num, chapter in enumerate(props['concepts'], 1): | |
195 | if not isinstance(chapter, dict): | |
196 | continue | |
197 | materials = chapter.get('materials') | |
198 | if not materials or not isinstance(materials, list): | |
199 | continue | |
200 | chapter_title = chapter.get('title') | |
201 | chapter_id = str_or_none(chapter.get('id')) | |
202 | for material in materials: | |
203 | if not isinstance(material, dict): | |
204 | continue | |
205 | if material.get('material_type') != 'video': | |
206 | continue | |
207 | video_url = urljoin(url, material.get('url')) | |
208 | if not video_url: | |
209 | continue | |
210 | entries.append({ | |
211 | '_type': 'url_transparent', | |
212 | 'url': video_url, | |
213 | 'title': str_or_none(material.get('name')), | |
214 | 'id': str_or_none(material.get('id')), | |
215 | 'ie_key': PlatziIE.ie_key(), | |
216 | 'chapter': chapter_title, | |
217 | 'chapter_number': chapter_num, | |
218 | 'chapter_id': chapter_id, | |
219 | }) | |
220 | ||
221 | course_id = compat_str(try_get(props, lambda x: x['course']['id'])) | |
222 | course_title = try_get(props, lambda x: x['course']['name'], compat_str) | |
223 | ||
224 | return self.playlist_result(entries, course_id, course_title) |