]>
Commit | Line | Data |
---|---|---|
dfe0a3a9 S |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
dfe0a3a9 | 4 | from ..utils import ( |
e897bd82 | 5 | ExtractorError, |
c9fa84d8 | 6 | clean_html, |
dfe0a3a9 | 7 | determine_ext, |
dfe0a3a9 S |
8 | float_or_none, |
9 | int_or_none, | |
10 | str_or_none, | |
11 | url_or_none, | |
12 | urlencode_postdata, | |
13 | urljoin, | |
14 | ) | |
15 | ||
16 | ||
17 | class LecturioBaseIE(InfoExtractor): | |
c9fa84d8 | 18 | _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' |
dfe0a3a9 S |
19 | _LOGIN_URL = 'https://app.lecturio.com/en/login' |
20 | _NETRC_MACHINE = 'lecturio' | |
21 | ||
52efa4b3 | 22 | def _perform_login(self, username, password): |
dfe0a3a9 S |
23 | # Sets some cookies |
24 | _, urlh = self._download_webpage_handle( | |
25 | self._LOGIN_URL, None, 'Downloading login popup') | |
26 | ||
27 | def is_logged(url_handle): | |
3d2623a8 | 28 | return self._LOGIN_URL not in url_handle.url |
dfe0a3a9 S |
29 | |
30 | # Already logged in | |
31 | if is_logged(urlh): | |
32 | return | |
33 | ||
34 | login_form = { | |
35 | 'signin[email]': username, | |
36 | 'signin[password]': password, | |
37 | 'signin[remember]': 'on', | |
38 | } | |
39 | ||
40 | response, urlh = self._download_webpage_handle( | |
41 | self._LOGIN_URL, None, 'Logging in', | |
42 | data=urlencode_postdata(login_form)) | |
43 | ||
44 | # Logged in successfully | |
45 | if is_logged(urlh): | |
46 | return | |
47 | ||
48 | errors = self._html_search_regex( | |
49 | r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response, | |
50 | 'errors', default=None) | |
51 | if errors: | |
add96eb9 | 52 | raise ExtractorError(f'Unable to login: {errors}', expected=True) |
dfe0a3a9 S |
53 | raise ExtractorError('Unable to log in') |
54 | ||
55 | ||
56 | class LecturioIE(LecturioBaseIE): | |
386d1fea S |
57 | _VALID_URL = r'''(?x) |
58 | https:// | |
59 | (?: | |
efa23395 S |
60 | app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))| |
61 | (?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag | |
386d1fea S |
62 | ) |
63 | ''' | |
64 | _TESTS = [{ | |
dfe0a3a9 | 65 | 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', |
c9fa84d8 | 66 | 'md5': '9a42cf1d8282a6311bf7211bbde26fde', |
dfe0a3a9 S |
67 | 'info_dict': { |
68 | 'id': '39634', | |
69 | 'ext': 'mp4', | |
c9fa84d8 | 70 | 'title': 'Important Concepts and Terms — Introduction to Microbiology', |
dfe0a3a9 S |
71 | }, |
72 | 'skip': 'Requires lecturio account credentials', | |
386d1fea S |
73 | }, { |
74 | 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', | |
75 | 'only_matching': True, | |
efa23395 S |
76 | }, { |
77 | 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag', | |
78 | 'only_matching': True, | |
c9fa84d8 RA |
79 | }, { |
80 | 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', | |
81 | 'only_matching': True, | |
386d1fea | 82 | }] |
dfe0a3a9 S |
83 | |
84 | _CC_LANGS = { | |
c9fa84d8 RA |
85 | 'Arabic': 'ar', |
86 | 'Bulgarian': 'bg', | |
dfe0a3a9 S |
87 | 'German': 'de', |
88 | 'English': 'en', | |
89 | 'Spanish': 'es', | |
c9fa84d8 | 90 | 'Persian': 'fa', |
dfe0a3a9 | 91 | 'French': 'fr', |
c9fa84d8 | 92 | 'Japanese': 'ja', |
dfe0a3a9 | 93 | 'Polish': 'pl', |
c9fa84d8 | 94 | 'Pashto': 'ps', |
dfe0a3a9 S |
95 | 'Russian': 'ru', |
96 | } | |
97 | ||
98 | def _real_extract(self, url): | |
5ad28e7f | 99 | mobj = self._match_valid_url(url) |
c9fa84d8 RA |
100 | nt = mobj.group('nt') or mobj.group('nt_de') |
101 | lecture_id = mobj.group('id') | |
102 | display_id = nt or lecture_id | |
103 | api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' | |
104 | video = self._download_json( | |
105 | self._API_BASE_URL + api_path, display_id) | |
dfe0a3a9 | 106 | title = video['title'].strip() |
c9fa84d8 RA |
107 | if not lecture_id: |
108 | pid = video.get('productId') or video.get('uid') | |
109 | if pid: | |
110 | spid = pid.split('_') | |
111 | if spid and len(spid) == 2: | |
112 | lecture_id = spid[1] | |
dfe0a3a9 S |
113 | |
114 | formats = [] | |
115 | for format_ in video['content']['media']: | |
116 | if not isinstance(format_, dict): | |
117 | continue | |
118 | file_ = format_.get('file') | |
119 | if not file_: | |
120 | continue | |
121 | ext = determine_ext(file_) | |
122 | if ext == 'smil': | |
123 | # smil contains only broken RTMP formats anyway | |
124 | continue | |
125 | file_url = url_or_none(file_) | |
126 | if not file_url: | |
127 | continue | |
128 | label = str_or_none(format_.get('label')) | |
129 | filesize = int_or_none(format_.get('fileSize')) | |
c9fa84d8 | 130 | f = { |
dfe0a3a9 S |
131 | 'url': file_url, |
132 | 'format_id': label, | |
add96eb9 | 133 | 'filesize': float_or_none(filesize, invscale=1000), |
c9fa84d8 RA |
134 | } |
135 | if label: | |
136 | mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) | |
137 | if mobj: | |
138 | f.update({ | |
139 | 'format_id': mobj.group(2), | |
140 | 'height': int(mobj.group(1)), | |
141 | }) | |
142 | formats.append(f) | |
dfe0a3a9 S |
143 | |
144 | subtitles = {} | |
145 | automatic_captions = {} | |
c9fa84d8 RA |
146 | captions = video.get('captions') or [] |
147 | for cc in captions: | |
148 | cc_url = cc.get('url') | |
dfe0a3a9 S |
149 | if not cc_url: |
150 | continue | |
c9fa84d8 RA |
151 | cc_label = cc.get('translatedCode') |
152 | lang = cc.get('languageCode') or self._search_regex( | |
0a05cfab S |
153 | r'/([a-z]{2})_', cc_url, 'lang', |
154 | default=cc_label.split()[0] if cc_label else 'en') | |
155 | original_lang = self._search_regex( | |
156 | r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang', | |
157 | default=None) | |
158 | sub_dict = (automatic_captions | |
159 | if 'auto-translated' in cc_label or original_lang | |
160 | else subtitles) | |
dfe0a3a9 S |
161 | sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({ |
162 | 'url': cc_url, | |
163 | }) | |
164 | ||
165 | return { | |
c9fa84d8 | 166 | 'id': lecture_id or nt, |
dfe0a3a9 S |
167 | 'title': title, |
168 | 'formats': formats, | |
169 | 'subtitles': subtitles, | |
170 | 'automatic_captions': automatic_captions, | |
171 | } | |
172 | ||
173 | ||
174 | class LecturioCourseIE(LecturioBaseIE): | |
a687226b | 175 | _VALID_URL = r'https?://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))' |
c9fa84d8 | 176 | _TESTS = [{ |
dfe0a3a9 S |
177 | 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', |
178 | 'info_dict': { | |
179 | 'id': 'microbiology-introduction', | |
180 | 'title': 'Microbiology: Introduction', | |
c9fa84d8 | 181 | 'description': 'md5:13da8500c25880c6016ae1e6d78c386a', |
dfe0a3a9 S |
182 | }, |
183 | 'playlist_count': 45, | |
184 | 'skip': 'Requires lecturio account credentials', | |
c9fa84d8 RA |
185 | }, { |
186 | 'url': 'https://app.lecturio.com/#/course/c/6434', | |
187 | 'only_matching': True, | |
188 | }] | |
dfe0a3a9 S |
189 | |
190 | def _real_extract(self, url): | |
5ad28e7f | 191 | nt, course_id = self._match_valid_url(url).groups() |
c9fa84d8 RA |
192 | display_id = nt or course_id |
193 | api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json' | |
194 | course = self._download_json( | |
195 | self._API_BASE_URL + api_path, display_id) | |
dfe0a3a9 | 196 | entries = [] |
c9fa84d8 RA |
197 | for lecture in course.get('lectures', []): |
198 | lecture_id = str_or_none(lecture.get('id')) | |
199 | lecture_url = lecture.get('url') | |
200 | if lecture_url: | |
201 | lecture_url = urljoin(url, lecture_url) | |
202 | else: | |
add96eb9 | 203 | lecture_url = f'https://app.lecturio.com/#/lecture/c/{course_id}/{lecture_id}' |
dfe0a3a9 S |
204 | entries.append(self.url_result( |
205 | lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) | |
c9fa84d8 RA |
206 | return self.playlist_result( |
207 | entries, display_id, course.get('title'), | |
208 | clean_html(course.get('description'))) | |
386d1fea S |
209 | |
210 | ||
211 | class LecturioDeCourseIE(LecturioBaseIE): | |
a687226b | 212 | _VALID_URL = r'https?://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs' |
386d1fea S |
213 | _TEST = { |
214 | 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', | |
215 | 'only_matching': True, | |
216 | } | |
217 | ||
218 | def _real_extract(self, url): | |
219 | display_id = self._match_id(url) | |
220 | ||
221 | webpage = self._download_webpage(url, display_id) | |
222 | ||
223 | entries = [] | |
224 | for mobj in re.finditer( | |
225 | r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>', | |
226 | webpage): | |
227 | lecture_url = urljoin(url, mobj.group('url')) | |
228 | lecture_id = mobj.group('id') | |
229 | entries.append(self.url_result( | |
230 | lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) | |
231 | ||
232 | title = self._search_regex( | |
233 | r'<h1[^>]*>([^<]+)', webpage, 'title', default=None) | |
234 | ||
235 | return self.playlist_result(entries, display_id, title) |