]>
Commit | Line | Data |
---|---|---|
c701472f S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import ( | |
6 | compat_b64decode, | |
7 | compat_str, | |
8 | ) | |
9 | from ..utils import ( | |
10 | clean_html, | |
11 | ExtractorError, | |
12 | int_or_none, | |
13 | str_or_none, | |
14 | try_get, | |
15 | url_or_none, | |
16 | urlencode_postdata, | |
17 | urljoin, | |
18 | ) | |
19 | ||
20 | ||
21 | class PlatziIE(InfoExtractor): | |
22 | _VALID_URL = r'''(?x) | |
23 | https?:// | |
24 | (?: | |
25 | platzi\.com/clases| # es version | |
26 | courses\.platzi\.com/classes # en version | |
27 | )/[^/]+/(?P<id>\d+)-[^/?\#&]+ | |
28 | ''' | |
29 | _LOGIN_URL = 'https://platzi.com/login/' | |
30 | _NETRC_MACHINE = 'platzi' | |
31 | ||
32 | _TESTS = [{ | |
33 | 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', | |
34 | 'md5': '8f56448241005b561c10f11a595b37e3', | |
35 | 'info_dict': { | |
36 | 'id': '12074', | |
37 | 'ext': 'mp4', | |
38 | 'title': 'Creando nuestra primera página', | |
39 | 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', | |
40 | 'duration': 420, | |
41 | }, | |
42 | 'skip': 'Requires platzi account credentials', | |
43 | }, { | |
44 | 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', | |
45 | 'info_dict': { | |
46 | 'id': '13430', | |
47 | 'ext': 'mp4', | |
48 | 'title': 'Background', | |
49 | 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', | |
50 | 'duration': 360, | |
51 | }, | |
52 | 'skip': 'Requires platzi account credentials', | |
53 | 'params': { | |
54 | 'skip_download': True, | |
55 | }, | |
56 | }] | |
57 | ||
58 | def _real_initialize(self): | |
59 | self._login() | |
60 | ||
61 | def _login(self): | |
62 | username, password = self._get_login_info() | |
63 | if username is None: | |
64 | return | |
65 | ||
66 | login_page = self._download_webpage( | |
67 | self._LOGIN_URL, None, 'Downloading login page') | |
68 | ||
69 | login_form = self._hidden_inputs(login_page) | |
70 | ||
71 | login_form.update({ | |
72 | 'email': username, | |
73 | 'password': password, | |
74 | }) | |
75 | ||
76 | urlh = self._request_webpage( | |
77 | self._LOGIN_URL, None, 'Logging in', | |
78 | data=urlencode_postdata(login_form), | |
79 | headers={'Referer': self._LOGIN_URL}) | |
80 | ||
81 | # login succeeded | |
82 | if 'platzi.com/login' not in compat_str(urlh.geturl()): | |
83 | return | |
84 | ||
85 | login_error = self._webpage_read_content( | |
86 | urlh, self._LOGIN_URL, None, 'Downloading login error page') | |
87 | ||
88 | login = self._parse_json( | |
89 | self._search_regex( | |
90 | r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), | |
91 | None) | |
92 | ||
93 | for kind in ('error', 'password', 'nonFields'): | |
94 | error = str_or_none(login.get('%sError' % kind)) | |
95 | if error: | |
96 | raise ExtractorError( | |
97 | 'Unable to login: %s' % error, expected=True) | |
98 | raise ExtractorError('Unable to log in') | |
99 | ||
100 | def _real_extract(self, url): | |
101 | lecture_id = self._match_id(url) | |
102 | ||
103 | webpage = self._download_webpage(url, lecture_id) | |
104 | ||
105 | data = self._parse_json( | |
106 | self._search_regex( | |
107 | r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), | |
108 | lecture_id) | |
109 | ||
110 | material = data['initialState']['material'] | |
111 | desc = material['description'] | |
112 | title = desc['title'] | |
113 | ||
114 | formats = [] | |
115 | for server_id, server in material['videos'].items(): | |
116 | if not isinstance(server, dict): | |
117 | continue | |
118 | for format_id in ('hls', 'dash'): | |
119 | format_url = url_or_none(server.get(format_id)) | |
120 | if not format_url: | |
121 | continue | |
122 | if format_id == 'hls': | |
123 | formats.extend(self._extract_m3u8_formats( | |
124 | format_url, lecture_id, 'mp4', | |
125 | entry_protocol='m3u8_native', m3u8_id=format_id, | |
126 | note='Downloading %s m3u8 information' % server_id, | |
127 | fatal=False)) | |
128 | elif format_id == 'dash': | |
129 | formats.extend(self._extract_mpd_formats( | |
130 | format_url, lecture_id, mpd_id=format_id, | |
131 | note='Downloading %s MPD manifest' % server_id, | |
132 | fatal=False)) | |
133 | self._sort_formats(formats) | |
134 | ||
135 | content = str_or_none(desc.get('content')) | |
136 | description = (clean_html(compat_b64decode(content).decode('utf-8')) | |
137 | if content else None) | |
138 | duration = int_or_none(material.get('duration'), invscale=60) | |
139 | ||
140 | return { | |
141 | 'id': lecture_id, | |
142 | 'title': title, | |
143 | 'description': description, | |
144 | 'duration': duration, | |
145 | 'formats': formats, | |
146 | } | |
147 | ||
148 | ||
149 | class PlatziCourseIE(InfoExtractor): | |
150 | _VALID_URL = r'''(?x) | |
151 | https?:// | |
152 | (?: | |
153 | platzi\.com/clases| # es version | |
154 | courses\.platzi\.com/classes # en version | |
155 | )/(?P<id>[^/?\#&]+) | |
156 | ''' | |
157 | _TESTS = [{ | |
158 | 'url': 'https://platzi.com/clases/next-js/', | |
159 | 'info_dict': { | |
160 | 'id': '1311', | |
161 | 'title': 'Curso de Next.js', | |
162 | }, | |
163 | 'playlist_count': 22, | |
164 | }, { | |
165 | 'url': 'https://courses.platzi.com/classes/communication-codestream/', | |
166 | 'info_dict': { | |
167 | 'id': '1367', | |
168 | 'title': 'Codestream Course', | |
169 | }, | |
170 | 'playlist_count': 14, | |
171 | }] | |
172 | ||
173 | @classmethod | |
174 | def suitable(cls, url): | |
175 | return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) | |
176 | ||
177 | def _real_extract(self, url): | |
178 | course_name = self._match_id(url) | |
179 | ||
180 | webpage = self._download_webpage(url, course_name) | |
181 | ||
182 | props = self._parse_json( | |
183 | self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), | |
184 | course_name)['initialProps'] | |
185 | ||
186 | entries = [] | |
187 | for chapter_num, chapter in enumerate(props['concepts'], 1): | |
188 | if not isinstance(chapter, dict): | |
189 | continue | |
190 | materials = chapter.get('materials') | |
191 | if not materials or not isinstance(materials, list): | |
192 | continue | |
193 | chapter_title = chapter.get('title') | |
194 | chapter_id = str_or_none(chapter.get('id')) | |
195 | for material in materials: | |
196 | if not isinstance(material, dict): | |
197 | continue | |
198 | if material.get('material_type') != 'video': | |
199 | continue | |
200 | video_url = urljoin(url, material.get('url')) | |
201 | if not video_url: | |
202 | continue | |
203 | entries.append({ | |
204 | '_type': 'url_transparent', | |
205 | 'url': video_url, | |
206 | 'title': str_or_none(material.get('name')), | |
207 | 'id': str_or_none(material.get('id')), | |
208 | 'ie_key': PlatziIE.ie_key(), | |
209 | 'chapter': chapter_title, | |
210 | 'chapter_number': chapter_num, | |
211 | 'chapter_id': chapter_id, | |
212 | }) | |
213 | ||
214 | course_id = compat_str(try_get(props, lambda x: x['course']['id'])) | |
215 | course_title = try_get(props, lambda x: x['course']['name'], compat_str) | |
216 | ||
217 | return self.playlist_result(entries, course_id, course_title) |