]>
Commit | Line | Data |
---|---|---|
dfe0a3a9 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import compat_str | |
8 | from ..utils import ( | |
9 | determine_ext, | |
10 | extract_attributes, | |
11 | ExtractorError, | |
12 | float_or_none, | |
13 | int_or_none, | |
14 | str_or_none, | |
15 | url_or_none, | |
16 | urlencode_postdata, | |
17 | urljoin, | |
18 | ) | |
19 | ||
20 | ||
21 | class LecturioBaseIE(InfoExtractor): | |
22 | _LOGIN_URL = 'https://app.lecturio.com/en/login' | |
23 | _NETRC_MACHINE = 'lecturio' | |
24 | ||
25 | def _real_initialize(self): | |
26 | self._login() | |
27 | ||
28 | def _login(self): | |
29 | username, password = self._get_login_info() | |
30 | if username is None: | |
31 | return | |
32 | ||
33 | # Sets some cookies | |
34 | _, urlh = self._download_webpage_handle( | |
35 | self._LOGIN_URL, None, 'Downloading login popup') | |
36 | ||
37 | def is_logged(url_handle): | |
38 | return self._LOGIN_URL not in compat_str(url_handle.geturl()) | |
39 | ||
40 | # Already logged in | |
41 | if is_logged(urlh): | |
42 | return | |
43 | ||
44 | login_form = { | |
45 | 'signin[email]': username, | |
46 | 'signin[password]': password, | |
47 | 'signin[remember]': 'on', | |
48 | } | |
49 | ||
50 | response, urlh = self._download_webpage_handle( | |
51 | self._LOGIN_URL, None, 'Logging in', | |
52 | data=urlencode_postdata(login_form)) | |
53 | ||
54 | # Logged in successfully | |
55 | if is_logged(urlh): | |
56 | return | |
57 | ||
58 | errors = self._html_search_regex( | |
59 | r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response, | |
60 | 'errors', default=None) | |
61 | if errors: | |
62 | raise ExtractorError('Unable to login: %s' % errors, expected=True) | |
63 | raise ExtractorError('Unable to log in') | |
64 | ||
65 | ||
66 | class LecturioIE(LecturioBaseIE): | |
386d1fea S |
67 | _VALID_URL = r'''(?x) |
68 | https:// | |
69 | (?: | |
70 | app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture| | |
71 | (?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag | |
72 | ) | |
73 | ''' | |
74 | _TESTS = [{ | |
dfe0a3a9 S |
75 | 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', |
76 | 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', | |
77 | 'info_dict': { | |
78 | 'id': '39634', | |
79 | 'ext': 'mp4', | |
80 | 'title': 'Important Concepts and Terms – Introduction to Microbiology', | |
81 | }, | |
82 | 'skip': 'Requires lecturio account credentials', | |
386d1fea S |
83 | }, { |
84 | 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', | |
85 | 'only_matching': True, | |
86 | }] | |
dfe0a3a9 S |
87 | |
88 | _CC_LANGS = { | |
89 | 'German': 'de', | |
90 | 'English': 'en', | |
91 | 'Spanish': 'es', | |
92 | 'French': 'fr', | |
93 | 'Polish': 'pl', | |
94 | 'Russian': 'ru', | |
95 | } | |
96 | ||
97 | def _real_extract(self, url): | |
386d1fea S |
98 | mobj = re.match(self._VALID_URL, url) |
99 | display_id = mobj.group('id') or mobj.group('id_de') | |
dfe0a3a9 S |
100 | |
101 | webpage = self._download_webpage( | |
102 | 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, | |
103 | display_id) | |
104 | ||
105 | lecture_id = self._search_regex( | |
106 | r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id') | |
107 | ||
108 | api_url = self._search_regex( | |
109 | r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | |
110 | 'api url', group='url') | |
111 | ||
112 | video = self._download_json(api_url, display_id) | |
113 | ||
114 | title = video['title'].strip() | |
115 | ||
116 | formats = [] | |
117 | for format_ in video['content']['media']: | |
118 | if not isinstance(format_, dict): | |
119 | continue | |
120 | file_ = format_.get('file') | |
121 | if not file_: | |
122 | continue | |
123 | ext = determine_ext(file_) | |
124 | if ext == 'smil': | |
125 | # smil contains only broken RTMP formats anyway | |
126 | continue | |
127 | file_url = url_or_none(file_) | |
128 | if not file_url: | |
129 | continue | |
130 | label = str_or_none(format_.get('label')) | |
131 | filesize = int_or_none(format_.get('fileSize')) | |
132 | formats.append({ | |
133 | 'url': file_url, | |
134 | 'format_id': label, | |
135 | 'filesize': float_or_none(filesize, invscale=1000) | |
136 | }) | |
137 | self._sort_formats(formats) | |
138 | ||
139 | subtitles = {} | |
140 | automatic_captions = {} | |
141 | cc = self._parse_json( | |
142 | self._search_regex( | |
143 | r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles', | |
144 | default='{}'), display_id, fatal=False) | |
145 | for cc_label, cc_url in cc.items(): | |
146 | cc_url = url_or_none(cc_url) | |
147 | if not cc_url: | |
148 | continue | |
dfe0a3a9 | 149 | lang = self._search_regex( |
0a05cfab S |
150 | r'/([a-z]{2})_', cc_url, 'lang', |
151 | default=cc_label.split()[0] if cc_label else 'en') | |
152 | original_lang = self._search_regex( | |
153 | r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang', | |
154 | default=None) | |
155 | sub_dict = (automatic_captions | |
156 | if 'auto-translated' in cc_label or original_lang | |
157 | else subtitles) | |
dfe0a3a9 S |
158 | sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({ |
159 | 'url': cc_url, | |
160 | }) | |
161 | ||
162 | return { | |
163 | 'id': lecture_id, | |
164 | 'title': title, | |
165 | 'formats': formats, | |
166 | 'subtitles': subtitles, | |
167 | 'automatic_captions': automatic_captions, | |
168 | } | |
169 | ||
170 | ||
171 | class LecturioCourseIE(LecturioBaseIE): | |
172 | _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course' | |
173 | _TEST = { | |
174 | 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', | |
175 | 'info_dict': { | |
176 | 'id': 'microbiology-introduction', | |
177 | 'title': 'Microbiology: Introduction', | |
178 | }, | |
179 | 'playlist_count': 45, | |
180 | 'skip': 'Requires lecturio account credentials', | |
181 | } | |
182 | ||
183 | def _real_extract(self, url): | |
184 | display_id = self._match_id(url) | |
185 | ||
186 | webpage = self._download_webpage(url, display_id) | |
187 | ||
188 | entries = [] | |
189 | for mobj in re.finditer( | |
190 | r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>', | |
191 | webpage): | |
192 | params = extract_attributes(mobj.group(0)) | |
193 | lecture_url = urljoin(url, params.get('data-url')) | |
194 | lecture_id = params.get('data-id') | |
195 | entries.append(self.url_result( | |
196 | lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) | |
197 | ||
198 | title = self._search_regex( | |
199 | r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage, | |
200 | 'title', default=None) | |
201 | ||
202 | return self.playlist_result(entries, display_id, title) | |
386d1fea S |
203 | |
204 | ||
205 | class LecturioDeCourseIE(LecturioBaseIE): | |
206 | _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs' | |
207 | _TEST = { | |
208 | 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', | |
209 | 'only_matching': True, | |
210 | } | |
211 | ||
212 | def _real_extract(self, url): | |
213 | display_id = self._match_id(url) | |
214 | ||
215 | webpage = self._download_webpage(url, display_id) | |
216 | ||
217 | entries = [] | |
218 | for mobj in re.finditer( | |
219 | r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>', | |
220 | webpage): | |
221 | lecture_url = urljoin(url, mobj.group('url')) | |
222 | lecture_id = mobj.group('id') | |
223 | entries.append(self.url_result( | |
224 | lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) | |
225 | ||
226 | title = self._search_regex( | |
227 | r'<h1[^>]*>([^<]+)', webpage, 'title', default=None) | |
228 | ||
229 | return self.playlist_result(entries, display_id, title) |