1 from .common
import InfoExtractor
18 class PlatziBaseIE(InfoExtractor
):
19 _LOGIN_URL
= 'https://platzi.com/login/'
20 _NETRC_MACHINE
= 'platzi'
22 def _perform_login(self
, username
, password
):
23 login_page
= self
._download
_webpage
(
24 self
._LOGIN
_URL
, None, 'Downloading login page')
26 login_form
= self
._hidden
_inputs
(login_page
)
33 urlh
= self
._request
_webpage
(
34 self
._LOGIN
_URL
, None, 'Logging in',
35 data
=urlencode_postdata(login_form
),
36 headers
={'Referer': self._LOGIN_URL}
)
39 if 'platzi.com/login' not in urlh
.geturl():
42 login_error
= self
._webpage
_read
_content
(
43 urlh
, self
._LOGIN
_URL
, None, 'Downloading login error page')
45 login
= self
._parse
_json
(
47 r
'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error
, 'login'),
50 for kind
in ('error', 'password', 'nonFields'):
51 error
= str_or_none(login
.get('%sError' % kind
))
54 'Unable to login: %s' % error
, expected
=True)
55 raise ExtractorError('Unable to log in')
58 class PlatziIE(PlatziBaseIE
):
62 platzi\.com/clases| # es version
63 courses\.platzi\.com/classes # en version
64 )/[^/]+/(?P<id>\d+)-[^/?\#&]+
68 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
69 'md5': '8f56448241005b561c10f11a595b37e3',
73 'title': 'Creando nuestra primera página',
74 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
77 'skip': 'Requires platzi account credentials',
79 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
83 'title': 'Background',
84 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
87 'skip': 'Requires platzi account credentials',
89 'skip_download': True,
93 def _real_extract(self
, url
):
94 lecture_id
= self
._match
_id
(url
)
96 webpage
= self
._download
_webpage
(url
, lecture_id
)
98 data
= self
._parse
_json
(
100 # client_data may contain "};" so that we have to try more
102 (r
'client_data\s*=\s*({.+?})\s*;\s*\n',
103 r
'client_data\s*=\s*({.+?})\s*;'),
104 webpage
, 'client data'),
107 material
= data
['initialState']['material']
108 desc
= material
['description']
109 title
= desc
['title']
112 for server_id
, server
in material
['videos'].items():
113 if not isinstance(server
, dict):
115 for format_id
in ('hls', 'dash'):
116 format_url
= url_or_none(server
.get(format_id
))
119 if format_id
== 'hls':
120 formats
.extend(self
._extract
_m
3u8_formats
(
121 format_url
, lecture_id
, 'mp4',
122 entry_protocol
='m3u8_native', m3u8_id
=format_id
,
123 note
='Downloading %s m3u8 information' % server_id
,
125 elif format_id
== 'dash':
126 formats
.extend(self
._extract
_mpd
_formats
(
127 format_url
, lecture_id
, mpd_id
=format_id
,
128 note
='Downloading %s MPD manifest' % server_id
,
130 self
._sort
_formats
(formats
)
132 content
= str_or_none(desc
.get('content'))
133 description
= (clean_html(compat_b64decode(content
).decode('utf-8'))
134 if content
else None)
135 duration
= int_or_none(material
.get('duration'), invscale
=60)
140 'description': description
,
141 'duration': duration
,
146 class PlatziCourseIE(PlatziBaseIE
):
147 _VALID_URL
= r
'''(?x)
150 platzi\.com/clases| # es version
151 courses\.platzi\.com/classes # en version
155 'url': 'https://platzi.com/clases/next-js/',
158 'title': 'Curso de Next.js',
160 'playlist_count': 22,
162 'url': 'https://courses.platzi.com/classes/communication-codestream/',
165 'title': 'Codestream Course',
167 'playlist_count': 14,
171 def suitable(cls
, url
):
172 return False if PlatziIE
.suitable(url
) else super(PlatziCourseIE
, cls
).suitable(url
)
174 def _real_extract(self
, url
):
175 course_name
= self
._match
_id
(url
)
177 webpage
= self
._download
_webpage
(url
, course_name
)
179 props
= self
._parse
_json
(
180 self
._search
_regex
(r
'data\s*=\s*({.+?})\s*;', webpage
, 'data'),
181 course_name
)['initialProps']
184 for chapter_num
, chapter
in enumerate(props
['concepts'], 1):
185 if not isinstance(chapter
, dict):
187 materials
= chapter
.get('materials')
188 if not materials
or not isinstance(materials
, list):
190 chapter_title
= chapter
.get('title')
191 chapter_id
= str_or_none(chapter
.get('id'))
192 for material
in materials
:
193 if not isinstance(material
, dict):
195 if material
.get('material_type') != 'video':
197 video_url
= urljoin(url
, material
.get('url'))
201 '_type': 'url_transparent',
203 'title': str_or_none(material
.get('name')),
204 'id': str_or_none(material
.get('id')),
205 'ie_key': PlatziIE
.ie_key(),
206 'chapter': chapter_title
,
207 'chapter_number': chapter_num
,
208 'chapter_id': chapter_id
,
211 course_id
= compat_str(try_get(props
, lambda x
: x
['course']['id']))
212 course_title
= try_get(props
, lambda x
: x
['course']['name'], compat_str
)
214 return self
.playlist_result(entries
, course_id
, course_title
)