]>
Commit | Line | Data |
---|---|---|
7fc60f4e S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from .wistia import WistiaIE | |
7fc60f4e S |
7 | from ..utils import ( |
8 | clean_html, | |
9 | ExtractorError, | |
4560adc8 | 10 | int_or_none, |
7fc60f4e | 11 | get_element_by_class, |
4560adc8 | 12 | strip_or_none, |
7fc60f4e S |
13 | urlencode_postdata, |
14 | urljoin, | |
15 | ) | |
16 | ||
17 | ||
5ee7ae5c S |
18 | class TeachableBaseIE(InfoExtractor): |
19 | _NETRC_MACHINE = 'teachable' | |
20 | _URL_PREFIX = 'teachable:' | |
21 | ||
22 | _SITES = { | |
23 | # Only notable ones here | |
08a27407 | 24 | 'v1.upskillcourses.com': 'upskill', |
38fa761a | 25 | 'gns3.teachable.com': 'gns3', |
5ee7ae5c S |
26 | 'academyhacker.com': 'academyhacker', |
27 | 'stackskills.com': 'stackskills', | |
28 | 'market.saleshacker.com': 'saleshacker', | |
29 | 'learnability.org': 'learnability', | |
30 | 'edurila.com': 'edurila', | |
eecf788b | 31 | 'courses.workitdaily.com': 'workitdaily', |
5ee7ae5c S |
32 | } |
33 | ||
34 | _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) | |
7fc60f4e S |
35 | |
36 | def _real_initialize(self): | |
5ee7ae5c | 37 | self._logged_in = False |
7fc60f4e | 38 | |
5ee7ae5c S |
39 | def _login(self, site): |
40 | if self._logged_in: | |
41 | return | |
42 | ||
52efa4b3 | 43 | username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site)) |
7fc60f4e S |
44 | if username is None: |
45 | return | |
46 | ||
47 | login_page, urlh = self._download_webpage_handle( | |
5ee7ae5c S |
48 | 'https://%s/sign_in' % site, None, |
49 | 'Downloading %s login page' % site) | |
7fc60f4e | 50 | |
74bc2994 S |
51 | def is_logged(webpage): |
52 | return any(re.search(p, webpage) for p in ( | |
53 | r'class=["\']user-signout', | |
54 | r'<a[^>]+\bhref=["\']/sign_out', | |
55 | r'Log\s+[Oo]ut\s*<')) | |
56 | ||
57 | if is_logged(login_page): | |
58 | self._logged_in = True | |
59 | return | |
60 | ||
7947a1f7 | 61 | login_url = urlh.geturl() |
7fc60f4e S |
62 | |
63 | login_form = self._hidden_inputs(login_page) | |
64 | ||
65 | login_form.update({ | |
66 | 'user[email]': username, | |
67 | 'user[password]': password, | |
68 | }) | |
69 | ||
70 | post_url = self._search_regex( | |
71 | r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page, | |
72 | 'post url', default=login_url, group='url') | |
73 | ||
74 | if not post_url.startswith('http'): | |
75 | post_url = urljoin(login_url, post_url) | |
76 | ||
77 | response = self._download_webpage( | |
5ee7ae5c | 78 | post_url, None, 'Logging in to %s' % site, |
7fc60f4e S |
79 | data=urlencode_postdata(login_form), |
80 | headers={ | |
81 | 'Content-Type': 'application/x-www-form-urlencoded', | |
82 | 'Referer': login_url, | |
83 | }) | |
84 | ||
5ee7ae5c S |
85 | if '>I accept the new Privacy Policy<' in response: |
86 | raise ExtractorError( | |
87 | 'Unable to login: %s asks you to accept new Privacy Policy. ' | |
88 | 'Go to https://%s/ and accept.' % (site, site), expected=True) | |
89 | ||
7fc60f4e | 90 | # Successful login |
74bc2994 | 91 | if is_logged(response): |
5ee7ae5c | 92 | self._logged_in = True |
7fc60f4e S |
93 | return |
94 | ||
95 | message = get_element_by_class('alert', response) | |
96 | if message is not None: | |
97 | raise ExtractorError( | |
98 | 'Unable to login: %s' % clean_html(message), expected=True) | |
99 | ||
100 | raise ExtractorError('Unable to log in') | |
101 | ||
102 | ||
5ee7ae5c S |
103 | class TeachableIE(TeachableBaseIE): |
104 | _VALID_URL = r'''(?x) | |
105 | (?: | |
106 | %shttps?://(?P<site_t>[^/]+)| | |
107 | https?://(?:www\.)?(?P<site>%s) | |
108 | ) | |
109 | /courses/[^/]+/lectures/(?P<id>\d+) | |
110 | ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE | |
7fc60f4e S |
111 | |
112 | _TESTS = [{ | |
6e47200b | 113 | 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', |
7fc60f4e | 114 | 'info_dict': { |
6e47200b S |
115 | 'id': 'untlgzk1v7', |
116 | 'ext': 'bin', | |
117 | 'title': 'Overview', | |
118 | 'description': 'md5:071463ff08b86c208811130ea1c2464c', | |
119 | 'duration': 736.4, | |
120 | 'timestamp': 1542315762, | |
121 | 'upload_date': '20181115', | |
122 | 'chapter': 'Welcome', | |
123 | 'chapter_number': 1, | |
7fc60f4e S |
124 | }, |
125 | 'params': { | |
126 | 'skip_download': True, | |
127 | }, | |
128 | }, { | |
08a27407 | 129 | 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', |
7fc60f4e | 130 | 'only_matching': True, |
5ee7ae5c | 131 | }, { |
38fa761a | 132 | 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', |
5ee7ae5c S |
133 | 'only_matching': True, |
134 | }, { | |
08a27407 | 135 | 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', |
5ee7ae5c | 136 | 'only_matching': True, |
7fc60f4e S |
137 | }] |
138 | ||
5ee7ae5c S |
139 | @staticmethod |
140 | def _is_teachable(webpage): | |
141 | return 'teachableTracker.linker:autoLink' in webpage and re.search( | |
29f7c58a | 142 | r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', |
5ee7ae5c S |
143 | webpage) |
144 | ||
145 | @staticmethod | |
146 | def _extract_url(webpage, source_url): | |
147 | if not TeachableIE._is_teachable(webpage): | |
5ee7ae5c S |
148 | return |
149 | if re.match(r'https?://[^/]+/(?:courses|p)', source_url): | |
150 | return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url) | |
151 | ||
7fc60f4e | 152 | def _real_extract(self, url): |
5ad28e7f | 153 | mobj = self._match_valid_url(url) |
5ee7ae5c S |
154 | site = mobj.group('site') or mobj.group('site_t') |
155 | video_id = mobj.group('id') | |
156 | ||
157 | self._login(site) | |
158 | ||
159 | prefixed = url.startswith(self._URL_PREFIX) | |
160 | if prefixed: | |
161 | url = url[len(self._URL_PREFIX):] | |
7fc60f4e S |
162 | |
163 | webpage = self._download_webpage(url, video_id) | |
164 | ||
00d798b7 S |
165 | wistia_urls = WistiaIE._extract_urls(webpage) |
166 | if not wistia_urls: | |
7fc60f4e S |
167 | if any(re.search(p, webpage) for p in ( |
168 | r'class=["\']lecture-contents-locked', | |
169 | r'>\s*Lecture contents locked', | |
f8a12427 S |
170 | r'id=["\']lecture-locked', |
171 | # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 | |
172 | r'class=["\'](?:inner-)?lesson-locked', | |
173 | r'>LESSON LOCKED<')): | |
7fc60f4e | 174 | self.raise_login_required('Lecture contents locked') |
941e359e | 175 | raise ExtractorError('Unable to find video URL') |
7fc60f4e S |
176 | |
177 | title = self._og_search_title(webpage, default=None) | |
178 | ||
4560adc8 S |
179 | chapter = None |
180 | chapter_number = None | |
181 | section_item = self._search_regex( | |
182 | r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id, | |
183 | webpage, 'section item', default=None, group='li') | |
184 | if section_item: | |
185 | chapter_number = int_or_none(self._search_regex( | |
186 | r'data-ss-position=["\'](\d+)', section_item, 'section id', | |
187 | default=None)) | |
188 | if chapter_number is not None: | |
189 | sections = [] | |
190 | for s in re.findall( | |
191 | r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage): | |
192 | section = strip_or_none(clean_html(s)) | |
193 | if not section: | |
194 | sections = [] | |
195 | break | |
196 | sections.append(section) | |
197 | if chapter_number <= len(sections): | |
198 | chapter = sections[chapter_number - 1] | |
199 | ||
00d798b7 | 200 | entries = [{ |
7fc60f4e S |
201 | '_type': 'url_transparent', |
202 | 'url': wistia_url, | |
203 | 'ie_key': WistiaIE.ie_key(), | |
204 | 'title': title, | |
4560adc8 S |
205 | 'chapter': chapter, |
206 | 'chapter_number': chapter_number, | |
00d798b7 S |
207 | } for wistia_url in wistia_urls] |
208 | ||
209 | return self.playlist_result(entries, video_id, title) | |
7fc60f4e S |
210 | |
211 | ||
5ee7ae5c S |
212 | class TeachableCourseIE(TeachableBaseIE): |
213 | _VALID_URL = r'''(?x) | |
214 | (?: | |
215 | %shttps?://(?P<site_t>[^/]+)| | |
216 | https?://(?:www\.)?(?P<site>%s) | |
217 | ) | |
218 | /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+) | |
219 | ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE | |
7fc60f4e | 220 | _TESTS = [{ |
08a27407 | 221 | 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', |
7fc60f4e | 222 | 'info_dict': { |
5ee7ae5c | 223 | 'id': 'essential-web-developer-course', |
7fc60f4e S |
224 | 'title': 'The Essential Web Developer Course (Free)', |
225 | }, | |
226 | 'playlist_count': 192, | |
227 | }, { | |
08a27407 | 228 | 'url': 'http://v1.upskillcourses.com/courses/119763/', |
7fc60f4e S |
229 | 'only_matching': True, |
230 | }, { | |
08a27407 | 231 | 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', |
7fc60f4e | 232 | 'only_matching': True, |
5ee7ae5c | 233 | }, { |
38fa761a | 234 | 'url': 'https://gns3.teachable.com/courses/enrolled/423415', |
5ee7ae5c S |
235 | 'only_matching': True, |
236 | }, { | |
237 | 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', | |
238 | 'only_matching': True, | |
239 | }, { | |
240 | 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', | |
241 | 'only_matching': True, | |
7fc60f4e S |
242 | }] |
243 | ||
244 | @classmethod | |
245 | def suitable(cls, url): | |
5ee7ae5c S |
246 | return False if TeachableIE.suitable(url) else super( |
247 | TeachableCourseIE, cls).suitable(url) | |
7fc60f4e S |
248 | |
249 | def _real_extract(self, url): | |
5ad28e7f | 250 | mobj = self._match_valid_url(url) |
5ee7ae5c S |
251 | site = mobj.group('site') or mobj.group('site_t') |
252 | course_id = mobj.group('id') | |
253 | ||
254 | self._login(site) | |
255 | ||
256 | prefixed = url.startswith(self._URL_PREFIX) | |
257 | if prefixed: | |
258 | prefix = self._URL_PREFIX | |
259 | url = url[len(prefix):] | |
7fc60f4e S |
260 | |
261 | webpage = self._download_webpage(url, course_id) | |
262 | ||
5ee7ae5c | 263 | url_base = 'https://%s/' % site |
7fc60f4e S |
264 | |
265 | entries = [] | |
266 | ||
267 | for mobj in re.finditer( | |
268 | r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', | |
269 | webpage): | |
270 | li = mobj.group('li') | |
29f7c58a | 271 | if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): |
7fc60f4e S |
272 | continue |
273 | lecture_url = self._search_regex( | |
274 | r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, | |
275 | 'lecture url', default=None, group='url') | |
276 | if not lecture_url: | |
277 | continue | |
278 | lecture_id = self._search_regex( | |
279 | r'/lectures/(\d+)', lecture_url, 'lecture id', default=None) | |
280 | title = self._html_search_regex( | |
281 | r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li, | |
282 | 'title', default=None) | |
5ee7ae5c S |
283 | entry_url = urljoin(url_base, lecture_url) |
284 | if prefixed: | |
285 | entry_url = self._URL_PREFIX + entry_url | |
7fc60f4e S |
286 | entries.append( |
287 | self.url_result( | |
5ee7ae5c S |
288 | entry_url, |
289 | ie=TeachableIE.ie_key(), video_id=lecture_id, | |
7fc60f4e S |
290 | video_title=clean_html(title))) |
291 | ||
292 | course_title = self._html_search_regex( | |
293 | (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h', | |
294 | r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'), | |
295 | webpage, 'course title', fatal=False) | |
296 | ||
297 | return self.playlist_result(entries, course_id, course_title) |