]>
Commit | Line | Data |
---|---|---|
7fc60f4e S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from .wistia import WistiaIE | |
7fc60f4e S |
7 | from ..utils import ( |
8 | clean_html, | |
9 | ExtractorError, | |
4560adc8 | 10 | int_or_none, |
7fc60f4e | 11 | get_element_by_class, |
4560adc8 | 12 | strip_or_none, |
7fc60f4e S |
13 | urlencode_postdata, |
14 | urljoin, | |
15 | ) | |
16 | ||
17 | ||
5ee7ae5c S |
18 | class TeachableBaseIE(InfoExtractor): |
19 | _NETRC_MACHINE = 'teachable' | |
20 | _URL_PREFIX = 'teachable:' | |
21 | ||
22 | _SITES = { | |
23 | # Only notable ones here | |
08a27407 | 24 | 'v1.upskillcourses.com': 'upskill', |
38fa761a | 25 | 'gns3.teachable.com': 'gns3', |
5ee7ae5c S |
26 | 'academyhacker.com': 'academyhacker', |
27 | 'stackskills.com': 'stackskills', | |
28 | 'market.saleshacker.com': 'saleshacker', | |
29 | 'learnability.org': 'learnability', | |
30 | 'edurila.com': 'edurila', | |
eecf788b | 31 | 'courses.workitdaily.com': 'workitdaily', |
5ee7ae5c S |
32 | } |
33 | ||
34 | _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) | |
7fc60f4e S |
35 | |
36 | def _real_initialize(self): | |
5ee7ae5c | 37 | self._logged_in = False |
7fc60f4e | 38 | |
5ee7ae5c S |
39 | def _login(self, site): |
40 | if self._logged_in: | |
41 | return | |
42 | ||
43 | username, password = self._get_login_info( | |
44 | netrc_machine=self._SITES.get(site, site)) | |
7fc60f4e S |
45 | if username is None: |
46 | return | |
47 | ||
48 | login_page, urlh = self._download_webpage_handle( | |
5ee7ae5c S |
49 | 'https://%s/sign_in' % site, None, |
50 | 'Downloading %s login page' % site) | |
7fc60f4e | 51 | |
74bc2994 S |
52 | def is_logged(webpage): |
53 | return any(re.search(p, webpage) for p in ( | |
54 | r'class=["\']user-signout', | |
55 | r'<a[^>]+\bhref=["\']/sign_out', | |
56 | r'Log\s+[Oo]ut\s*<')) | |
57 | ||
58 | if is_logged(login_page): | |
59 | self._logged_in = True | |
60 | return | |
61 | ||
7947a1f7 | 62 | login_url = urlh.geturl() |
7fc60f4e S |
63 | |
64 | login_form = self._hidden_inputs(login_page) | |
65 | ||
66 | login_form.update({ | |
67 | 'user[email]': username, | |
68 | 'user[password]': password, | |
69 | }) | |
70 | ||
71 | post_url = self._search_regex( | |
72 | r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page, | |
73 | 'post url', default=login_url, group='url') | |
74 | ||
75 | if not post_url.startswith('http'): | |
76 | post_url = urljoin(login_url, post_url) | |
77 | ||
78 | response = self._download_webpage( | |
5ee7ae5c | 79 | post_url, None, 'Logging in to %s' % site, |
7fc60f4e S |
80 | data=urlencode_postdata(login_form), |
81 | headers={ | |
82 | 'Content-Type': 'application/x-www-form-urlencoded', | |
83 | 'Referer': login_url, | |
84 | }) | |
85 | ||
5ee7ae5c S |
86 | if '>I accept the new Privacy Policy<' in response: |
87 | raise ExtractorError( | |
88 | 'Unable to login: %s asks you to accept new Privacy Policy. ' | |
89 | 'Go to https://%s/ and accept.' % (site, site), expected=True) | |
90 | ||
7fc60f4e | 91 | # Successful login |
74bc2994 | 92 | if is_logged(response): |
5ee7ae5c | 93 | self._logged_in = True |
7fc60f4e S |
94 | return |
95 | ||
96 | message = get_element_by_class('alert', response) | |
97 | if message is not None: | |
98 | raise ExtractorError( | |
99 | 'Unable to login: %s' % clean_html(message), expected=True) | |
100 | ||
101 | raise ExtractorError('Unable to log in') | |
102 | ||
103 | ||
5ee7ae5c S |
104 | class TeachableIE(TeachableBaseIE): |
105 | _VALID_URL = r'''(?x) | |
106 | (?: | |
107 | %shttps?://(?P<site_t>[^/]+)| | |
108 | https?://(?:www\.)?(?P<site>%s) | |
109 | ) | |
110 | /courses/[^/]+/lectures/(?P<id>\d+) | |
111 | ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE | |
7fc60f4e S |
112 | |
113 | _TESTS = [{ | |
6e47200b | 114 | 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', |
7fc60f4e | 115 | 'info_dict': { |
6e47200b S |
116 | 'id': 'untlgzk1v7', |
117 | 'ext': 'bin', | |
118 | 'title': 'Overview', | |
119 | 'description': 'md5:071463ff08b86c208811130ea1c2464c', | |
120 | 'duration': 736.4, | |
121 | 'timestamp': 1542315762, | |
122 | 'upload_date': '20181115', | |
123 | 'chapter': 'Welcome', | |
124 | 'chapter_number': 1, | |
7fc60f4e S |
125 | }, |
126 | 'params': { | |
127 | 'skip_download': True, | |
128 | }, | |
129 | }, { | |
08a27407 | 130 | 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', |
7fc60f4e | 131 | 'only_matching': True, |
5ee7ae5c | 132 | }, { |
38fa761a | 133 | 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', |
5ee7ae5c S |
134 | 'only_matching': True, |
135 | }, { | |
08a27407 | 136 | 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', |
5ee7ae5c | 137 | 'only_matching': True, |
7fc60f4e S |
138 | }] |
139 | ||
5ee7ae5c S |
140 | @staticmethod |
141 | def _is_teachable(webpage): | |
142 | return 'teachableTracker.linker:autoLink' in webpage and re.search( | |
29f7c58a | 143 | r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', |
5ee7ae5c S |
144 | webpage) |
145 | ||
146 | @staticmethod | |
147 | def _extract_url(webpage, source_url): | |
148 | if not TeachableIE._is_teachable(webpage): | |
5ee7ae5c S |
149 | return |
150 | if re.match(r'https?://[^/]+/(?:courses|p)', source_url): | |
151 | return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url) | |
152 | ||
7fc60f4e | 153 | def _real_extract(self, url): |
5ee7ae5c S |
154 | mobj = re.match(self._VALID_URL, url) |
155 | site = mobj.group('site') or mobj.group('site_t') | |
156 | video_id = mobj.group('id') | |
157 | ||
158 | self._login(site) | |
159 | ||
160 | prefixed = url.startswith(self._URL_PREFIX) | |
161 | if prefixed: | |
162 | url = url[len(self._URL_PREFIX):] | |
7fc60f4e S |
163 | |
164 | webpage = self._download_webpage(url, video_id) | |
165 | ||
00d798b7 S |
166 | wistia_urls = WistiaIE._extract_urls(webpage) |
167 | if not wistia_urls: | |
7fc60f4e S |
168 | if any(re.search(p, webpage) for p in ( |
169 | r'class=["\']lecture-contents-locked', | |
170 | r'>\s*Lecture contents locked', | |
f8a12427 S |
171 | r'id=["\']lecture-locked', |
172 | # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 | |
173 | r'class=["\'](?:inner-)?lesson-locked', | |
174 | r'>LESSON LOCKED<')): | |
7fc60f4e | 175 | self.raise_login_required('Lecture contents locked') |
941e359e | 176 | raise ExtractorError('Unable to find video URL') |
7fc60f4e S |
177 | |
178 | title = self._og_search_title(webpage, default=None) | |
179 | ||
4560adc8 S |
180 | chapter = None |
181 | chapter_number = None | |
182 | section_item = self._search_regex( | |
183 | r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id, | |
184 | webpage, 'section item', default=None, group='li') | |
185 | if section_item: | |
186 | chapter_number = int_or_none(self._search_regex( | |
187 | r'data-ss-position=["\'](\d+)', section_item, 'section id', | |
188 | default=None)) | |
189 | if chapter_number is not None: | |
190 | sections = [] | |
191 | for s in re.findall( | |
192 | r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage): | |
193 | section = strip_or_none(clean_html(s)) | |
194 | if not section: | |
195 | sections = [] | |
196 | break | |
197 | sections.append(section) | |
198 | if chapter_number <= len(sections): | |
199 | chapter = sections[chapter_number - 1] | |
200 | ||
00d798b7 | 201 | entries = [{ |
7fc60f4e S |
202 | '_type': 'url_transparent', |
203 | 'url': wistia_url, | |
204 | 'ie_key': WistiaIE.ie_key(), | |
205 | 'title': title, | |
4560adc8 S |
206 | 'chapter': chapter, |
207 | 'chapter_number': chapter_number, | |
00d798b7 S |
208 | } for wistia_url in wistia_urls] |
209 | ||
210 | return self.playlist_result(entries, video_id, title) | |
7fc60f4e S |
211 | |
212 | ||
5ee7ae5c S |
213 | class TeachableCourseIE(TeachableBaseIE): |
214 | _VALID_URL = r'''(?x) | |
215 | (?: | |
216 | %shttps?://(?P<site_t>[^/]+)| | |
217 | https?://(?:www\.)?(?P<site>%s) | |
218 | ) | |
219 | /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+) | |
220 | ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE | |
7fc60f4e | 221 | _TESTS = [{ |
08a27407 | 222 | 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', |
7fc60f4e | 223 | 'info_dict': { |
5ee7ae5c | 224 | 'id': 'essential-web-developer-course', |
7fc60f4e S |
225 | 'title': 'The Essential Web Developer Course (Free)', |
226 | }, | |
227 | 'playlist_count': 192, | |
228 | }, { | |
08a27407 | 229 | 'url': 'http://v1.upskillcourses.com/courses/119763/', |
7fc60f4e S |
230 | 'only_matching': True, |
231 | }, { | |
08a27407 | 232 | 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', |
7fc60f4e | 233 | 'only_matching': True, |
5ee7ae5c | 234 | }, { |
38fa761a | 235 | 'url': 'https://gns3.teachable.com/courses/enrolled/423415', |
5ee7ae5c S |
236 | 'only_matching': True, |
237 | }, { | |
238 | 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', | |
239 | 'only_matching': True, | |
240 | }, { | |
241 | 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', | |
242 | 'only_matching': True, | |
7fc60f4e S |
243 | }] |
244 | ||
245 | @classmethod | |
246 | def suitable(cls, url): | |
5ee7ae5c S |
247 | return False if TeachableIE.suitable(url) else super( |
248 | TeachableCourseIE, cls).suitable(url) | |
7fc60f4e S |
249 | |
250 | def _real_extract(self, url): | |
5ee7ae5c S |
251 | mobj = re.match(self._VALID_URL, url) |
252 | site = mobj.group('site') or mobj.group('site_t') | |
253 | course_id = mobj.group('id') | |
254 | ||
255 | self._login(site) | |
256 | ||
257 | prefixed = url.startswith(self._URL_PREFIX) | |
258 | if prefixed: | |
259 | prefix = self._URL_PREFIX | |
260 | url = url[len(prefix):] | |
7fc60f4e S |
261 | |
262 | webpage = self._download_webpage(url, course_id) | |
263 | ||
5ee7ae5c | 264 | url_base = 'https://%s/' % site |
7fc60f4e S |
265 | |
266 | entries = [] | |
267 | ||
268 | for mobj in re.finditer( | |
269 | r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', | |
270 | webpage): | |
271 | li = mobj.group('li') | |
29f7c58a | 272 | if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): |
7fc60f4e S |
273 | continue |
274 | lecture_url = self._search_regex( | |
275 | r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, | |
276 | 'lecture url', default=None, group='url') | |
277 | if not lecture_url: | |
278 | continue | |
279 | lecture_id = self._search_regex( | |
280 | r'/lectures/(\d+)', lecture_url, 'lecture id', default=None) | |
281 | title = self._html_search_regex( | |
282 | r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li, | |
283 | 'title', default=None) | |
5ee7ae5c S |
284 | entry_url = urljoin(url_base, lecture_url) |
285 | if prefixed: | |
286 | entry_url = self._URL_PREFIX + entry_url | |
7fc60f4e S |
287 | entries.append( |
288 | self.url_result( | |
5ee7ae5c S |
289 | entry_url, |
290 | ie=TeachableIE.ie_key(), video_id=lecture_id, | |
7fc60f4e S |
291 | video_title=clean_html(title))) |
292 | ||
293 | course_title = self._html_search_regex( | |
294 | (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h', | |
295 | r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'), | |
296 | webpage, 'course title', fatal=False) | |
297 | ||
298 | return self.playlist_result(entries, course_id, course_title) |