]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..compat import ( | |
3 | compat_b64decode, | |
4 | compat_str, | |
5 | ) | |
6 | from ..utils import ( | |
7 | clean_html, | |
8 | ExtractorError, | |
9 | int_or_none, | |
10 | str_or_none, | |
11 | try_get, | |
12 | url_or_none, | |
13 | urlencode_postdata, | |
14 | urljoin, | |
15 | ) | |
16 | ||
17 | ||
18 | class PlatziBaseIE(InfoExtractor): | |
19 | _LOGIN_URL = 'https://platzi.com/login/' | |
20 | _NETRC_MACHINE = 'platzi' | |
21 | ||
22 | def _perform_login(self, username, password): | |
23 | login_page = self._download_webpage( | |
24 | self._LOGIN_URL, None, 'Downloading login page') | |
25 | ||
26 | login_form = self._hidden_inputs(login_page) | |
27 | ||
28 | login_form.update({ | |
29 | 'email': username, | |
30 | 'password': password, | |
31 | }) | |
32 | ||
33 | urlh = self._request_webpage( | |
34 | self._LOGIN_URL, None, 'Logging in', | |
35 | data=urlencode_postdata(login_form), | |
36 | headers={'Referer': self._LOGIN_URL}) | |
37 | ||
38 | # login succeeded | |
39 | if 'platzi.com/login' not in urlh.url: | |
40 | return | |
41 | ||
42 | login_error = self._webpage_read_content( | |
43 | urlh, self._LOGIN_URL, None, 'Downloading login error page') | |
44 | ||
45 | login = self._parse_json( | |
46 | self._search_regex( | |
47 | r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), | |
48 | None) | |
49 | ||
50 | for kind in ('error', 'password', 'nonFields'): | |
51 | error = str_or_none(login.get('%sError' % kind)) | |
52 | if error: | |
53 | raise ExtractorError( | |
54 | 'Unable to login: %s' % error, expected=True) | |
55 | raise ExtractorError('Unable to log in') | |
56 | ||
57 | ||
58 | class PlatziIE(PlatziBaseIE): | |
59 | _VALID_URL = r'''(?x) | |
60 | https?:// | |
61 | (?: | |
62 | platzi\.com/clases| # es version | |
63 | courses\.platzi\.com/classes # en version | |
64 | )/[^/]+/(?P<id>\d+)-[^/?\#&]+ | |
65 | ''' | |
66 | ||
67 | _TESTS = [{ | |
68 | 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', | |
69 | 'md5': '8f56448241005b561c10f11a595b37e3', | |
70 | 'info_dict': { | |
71 | 'id': '12074', | |
72 | 'ext': 'mp4', | |
73 | 'title': 'Creando nuestra primera página', | |
74 | 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', | |
75 | 'duration': 420, | |
76 | }, | |
77 | 'skip': 'Requires platzi account credentials', | |
78 | }, { | |
79 | 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', | |
80 | 'info_dict': { | |
81 | 'id': '13430', | |
82 | 'ext': 'mp4', | |
83 | 'title': 'Background', | |
84 | 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', | |
85 | 'duration': 360, | |
86 | }, | |
87 | 'skip': 'Requires platzi account credentials', | |
88 | 'params': { | |
89 | 'skip_download': True, | |
90 | }, | |
91 | }] | |
92 | ||
93 | def _real_extract(self, url): | |
94 | lecture_id = self._match_id(url) | |
95 | ||
96 | webpage = self._download_webpage(url, lecture_id) | |
97 | ||
98 | data = self._parse_json( | |
99 | self._search_regex( | |
100 | # client_data may contain "};" so that we have to try more | |
101 | # strict regex first | |
102 | (r'client_data\s*=\s*({.+?})\s*;\s*\n', | |
103 | r'client_data\s*=\s*({.+?})\s*;'), | |
104 | webpage, 'client data'), | |
105 | lecture_id) | |
106 | ||
107 | material = data['initialState']['material'] | |
108 | desc = material['description'] | |
109 | title = desc['title'] | |
110 | ||
111 | formats = [] | |
112 | for server_id, server in material['videos'].items(): | |
113 | if not isinstance(server, dict): | |
114 | continue | |
115 | for format_id in ('hls', 'dash'): | |
116 | format_url = url_or_none(server.get(format_id)) | |
117 | if not format_url: | |
118 | continue | |
119 | if format_id == 'hls': | |
120 | formats.extend(self._extract_m3u8_formats( | |
121 | format_url, lecture_id, 'mp4', | |
122 | entry_protocol='m3u8_native', m3u8_id=format_id, | |
123 | note='Downloading %s m3u8 information' % server_id, | |
124 | fatal=False)) | |
125 | elif format_id == 'dash': | |
126 | formats.extend(self._extract_mpd_formats( | |
127 | format_url, lecture_id, mpd_id=format_id, | |
128 | note='Downloading %s MPD manifest' % server_id, | |
129 | fatal=False)) | |
130 | ||
131 | content = str_or_none(desc.get('content')) | |
132 | description = (clean_html(compat_b64decode(content).decode('utf-8')) | |
133 | if content else None) | |
134 | duration = int_or_none(material.get('duration'), invscale=60) | |
135 | ||
136 | return { | |
137 | 'id': lecture_id, | |
138 | 'title': title, | |
139 | 'description': description, | |
140 | 'duration': duration, | |
141 | 'formats': formats, | |
142 | } | |
143 | ||
144 | ||
145 | class PlatziCourseIE(PlatziBaseIE): | |
146 | _VALID_URL = r'''(?x) | |
147 | https?:// | |
148 | (?: | |
149 | platzi\.com/clases| # es version | |
150 | courses\.platzi\.com/classes # en version | |
151 | )/(?P<id>[^/?\#&]+) | |
152 | ''' | |
153 | _TESTS = [{ | |
154 | 'url': 'https://platzi.com/clases/next-js/', | |
155 | 'info_dict': { | |
156 | 'id': '1311', | |
157 | 'title': 'Curso de Next.js', | |
158 | }, | |
159 | 'playlist_count': 22, | |
160 | }, { | |
161 | 'url': 'https://courses.platzi.com/classes/communication-codestream/', | |
162 | 'info_dict': { | |
163 | 'id': '1367', | |
164 | 'title': 'Codestream Course', | |
165 | }, | |
166 | 'playlist_count': 14, | |
167 | }] | |
168 | ||
169 | @classmethod | |
170 | def suitable(cls, url): | |
171 | return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) | |
172 | ||
173 | def _real_extract(self, url): | |
174 | course_name = self._match_id(url) | |
175 | ||
176 | webpage = self._download_webpage(url, course_name) | |
177 | ||
178 | props = self._parse_json( | |
179 | self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), | |
180 | course_name)['initialProps'] | |
181 | ||
182 | entries = [] | |
183 | for chapter_num, chapter in enumerate(props['concepts'], 1): | |
184 | if not isinstance(chapter, dict): | |
185 | continue | |
186 | materials = chapter.get('materials') | |
187 | if not materials or not isinstance(materials, list): | |
188 | continue | |
189 | chapter_title = chapter.get('title') | |
190 | chapter_id = str_or_none(chapter.get('id')) | |
191 | for material in materials: | |
192 | if not isinstance(material, dict): | |
193 | continue | |
194 | if material.get('material_type') != 'video': | |
195 | continue | |
196 | video_url = urljoin(url, material.get('url')) | |
197 | if not video_url: | |
198 | continue | |
199 | entries.append({ | |
200 | '_type': 'url_transparent', | |
201 | 'url': video_url, | |
202 | 'title': str_or_none(material.get('name')), | |
203 | 'id': str_or_none(material.get('id')), | |
204 | 'ie_key': PlatziIE.ie_key(), | |
205 | 'chapter': chapter_title, | |
206 | 'chapter_number': chapter_num, | |
207 | 'chapter_id': chapter_id, | |
208 | }) | |
209 | ||
210 | course_id = compat_str(try_get(props, lambda x: x['course']['id'])) | |
211 | course_title = try_get(props, lambda x: x['course']['name'], compat_str) | |
212 | ||
213 | return self.playlist_result(entries, course_id, course_title) |