]>
Commit | Line | Data |
---|---|---|
7fc60f4e S |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from .wistia import WistiaIE | |
7fc60f4e | 5 | from ..utils import ( |
7fc60f4e | 6 | ExtractorError, |
e897bd82 | 7 | clean_html, |
7fc60f4e | 8 | get_element_by_class, |
e897bd82 | 9 | int_or_none, |
4560adc8 | 10 | strip_or_none, |
7fc60f4e S |
11 | urlencode_postdata, |
12 | urljoin, | |
13 | ) | |
14 | ||
15 | ||
5ee7ae5c S |
16 | class TeachableBaseIE(InfoExtractor): |
17 | _NETRC_MACHINE = 'teachable' | |
18 | _URL_PREFIX = 'teachable:' | |
19 | ||
20 | _SITES = { | |
21 | # Only notable ones here | |
08a27407 | 22 | 'v1.upskillcourses.com': 'upskill', |
38fa761a | 23 | 'gns3.teachable.com': 'gns3', |
5ee7ae5c S |
24 | 'academyhacker.com': 'academyhacker', |
25 | 'stackskills.com': 'stackskills', | |
26 | 'market.saleshacker.com': 'saleshacker', | |
27 | 'learnability.org': 'learnability', | |
28 | 'edurila.com': 'edurila', | |
eecf788b | 29 | 'courses.workitdaily.com': 'workitdaily', |
5ee7ae5c S |
30 | } |
31 | ||
add96eb9 | 32 | _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES)) |
7fc60f4e S |
33 | |
34 | def _real_initialize(self): | |
5ee7ae5c | 35 | self._logged_in = False |
7fc60f4e | 36 | |
5ee7ae5c S |
37 | def _login(self, site): |
38 | if self._logged_in: | |
39 | return | |
40 | ||
52efa4b3 | 41 | username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site)) |
7fc60f4e S |
42 | if username is None: |
43 | return | |
44 | ||
45 | login_page, urlh = self._download_webpage_handle( | |
add96eb9 | 46 | f'https://{site}/sign_in', None, |
47 | f'Downloading {site} login page') | |
7fc60f4e | 48 | |
74bc2994 S |
49 | def is_logged(webpage): |
50 | return any(re.search(p, webpage) for p in ( | |
51 | r'class=["\']user-signout', | |
52 | r'<a[^>]+\bhref=["\']/sign_out', | |
53 | r'Log\s+[Oo]ut\s*<')) | |
54 | ||
55 | if is_logged(login_page): | |
56 | self._logged_in = True | |
57 | return | |
58 | ||
3d2623a8 | 59 | login_url = urlh.url |
7fc60f4e S |
60 | |
61 | login_form = self._hidden_inputs(login_page) | |
62 | ||
63 | login_form.update({ | |
64 | 'user[email]': username, | |
65 | 'user[password]': password, | |
66 | }) | |
67 | ||
68 | post_url = self._search_regex( | |
69 | r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page, | |
70 | 'post url', default=login_url, group='url') | |
71 | ||
72 | if not post_url.startswith('http'): | |
73 | post_url = urljoin(login_url, post_url) | |
74 | ||
75 | response = self._download_webpage( | |
add96eb9 | 76 | post_url, None, f'Logging in to {site}', |
7fc60f4e S |
77 | data=urlencode_postdata(login_form), |
78 | headers={ | |
79 | 'Content-Type': 'application/x-www-form-urlencoded', | |
80 | 'Referer': login_url, | |
81 | }) | |
82 | ||
5ee7ae5c S |
83 | if '>I accept the new Privacy Policy<' in response: |
84 | raise ExtractorError( | |
add96eb9 | 85 | f'Unable to login: {site} asks you to accept new Privacy Policy. ' |
86 | f'Go to https://{site}/ and accept.', expected=True) | |
5ee7ae5c | 87 | |
7fc60f4e | 88 | # Successful login |
74bc2994 | 89 | if is_logged(response): |
5ee7ae5c | 90 | self._logged_in = True |
7fc60f4e S |
91 | return |
92 | ||
93 | message = get_element_by_class('alert', response) | |
94 | if message is not None: | |
95 | raise ExtractorError( | |
add96eb9 | 96 | f'Unable to login: {clean_html(message)}', expected=True) |
7fc60f4e S |
97 | |
98 | raise ExtractorError('Unable to log in') | |
99 | ||
100 | ||
5ee7ae5c | 101 | class TeachableIE(TeachableBaseIE): |
df773c3d | 102 | _WORKING = False |
5ee7ae5c S |
103 | _VALID_URL = r'''(?x) |
104 | (?: | |
add96eb9 | 105 | {}https?://(?P<site_t>[^/]+)| |
106 | https?://(?:www\.)?(?P<site>{}) | |
5ee7ae5c S |
107 | ) |
108 | /courses/[^/]+/lectures/(?P<id>\d+) | |
add96eb9 | 109 | '''.format(*TeachableBaseIE._VALID_URL_SUB_TUPLE) |
7fc60f4e S |
110 | |
111 | _TESTS = [{ | |
6e47200b | 112 | 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', |
7fc60f4e | 113 | 'info_dict': { |
6e47200b S |
114 | 'id': 'untlgzk1v7', |
115 | 'ext': 'bin', | |
116 | 'title': 'Overview', | |
117 | 'description': 'md5:071463ff08b86c208811130ea1c2464c', | |
118 | 'duration': 736.4, | |
119 | 'timestamp': 1542315762, | |
120 | 'upload_date': '20181115', | |
121 | 'chapter': 'Welcome', | |
122 | 'chapter_number': 1, | |
7fc60f4e S |
123 | }, |
124 | 'params': { | |
125 | 'skip_download': True, | |
126 | }, | |
127 | }, { | |
08a27407 | 128 | 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', |
7fc60f4e | 129 | 'only_matching': True, |
5ee7ae5c | 130 | }, { |
38fa761a | 131 | 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', |
5ee7ae5c S |
132 | 'only_matching': True, |
133 | }, { | |
08a27407 | 134 | 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', |
5ee7ae5c | 135 | 'only_matching': True, |
7fc60f4e S |
136 | }] |
137 | ||
5ee7ae5c S |
138 | @staticmethod |
139 | def _is_teachable(webpage): | |
140 | return 'teachableTracker.linker:autoLink' in webpage and re.search( | |
29f7c58a | 141 | r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', |
5ee7ae5c S |
142 | webpage) |
143 | ||
bfd973ec | 144 | @classmethod |
145 | def _extract_embed_urls(cls, url, webpage): | |
146 | if cls._is_teachable(webpage): | |
147 | if re.match(r'https?://[^/]+/(?:courses|p)', url): | |
148 | yield f'{cls._URL_PREFIX}{url}' | |
add96eb9 | 149 | raise cls.StopExtraction |
5ee7ae5c | 150 | |
7fc60f4e | 151 | def _real_extract(self, url): |
5ad28e7f | 152 | mobj = self._match_valid_url(url) |
5ee7ae5c S |
153 | site = mobj.group('site') or mobj.group('site_t') |
154 | video_id = mobj.group('id') | |
155 | ||
156 | self._login(site) | |
157 | ||
158 | prefixed = url.startswith(self._URL_PREFIX) | |
159 | if prefixed: | |
160 | url = url[len(self._URL_PREFIX):] | |
7fc60f4e S |
161 | |
162 | webpage = self._download_webpage(url, video_id) | |
163 | ||
bfd973ec | 164 | wistia_urls = WistiaIE._extract_embed_urls(url, webpage) |
00d798b7 | 165 | if not wistia_urls: |
7fc60f4e S |
166 | if any(re.search(p, webpage) for p in ( |
167 | r'class=["\']lecture-contents-locked', | |
168 | r'>\s*Lecture contents locked', | |
f8a12427 S |
169 | r'id=["\']lecture-locked', |
170 | # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 | |
171 | r'class=["\'](?:inner-)?lesson-locked', | |
172 | r'>LESSON LOCKED<')): | |
7fc60f4e | 173 | self.raise_login_required('Lecture contents locked') |
941e359e | 174 | raise ExtractorError('Unable to find video URL') |
7fc60f4e S |
175 | |
176 | title = self._og_search_title(webpage, default=None) | |
177 | ||
4560adc8 S |
178 | chapter = None |
179 | chapter_number = None | |
180 | section_item = self._search_regex( | |
add96eb9 | 181 | rf'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']{video_id}[^>]+>.+?</li>)', |
4560adc8 S |
182 | webpage, 'section item', default=None, group='li') |
183 | if section_item: | |
184 | chapter_number = int_or_none(self._search_regex( | |
185 | r'data-ss-position=["\'](\d+)', section_item, 'section id', | |
186 | default=None)) | |
187 | if chapter_number is not None: | |
188 | sections = [] | |
189 | for s in re.findall( | |
190 | r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage): | |
191 | section = strip_or_none(clean_html(s)) | |
192 | if not section: | |
193 | sections = [] | |
194 | break | |
195 | sections.append(section) | |
196 | if chapter_number <= len(sections): | |
197 | chapter = sections[chapter_number - 1] | |
198 | ||
00d798b7 | 199 | entries = [{ |
7fc60f4e S |
200 | '_type': 'url_transparent', |
201 | 'url': wistia_url, | |
202 | 'ie_key': WistiaIE.ie_key(), | |
203 | 'title': title, | |
4560adc8 S |
204 | 'chapter': chapter, |
205 | 'chapter_number': chapter_number, | |
00d798b7 S |
206 | } for wistia_url in wistia_urls] |
207 | ||
208 | return self.playlist_result(entries, video_id, title) | |
7fc60f4e S |
209 | |
210 | ||
5ee7ae5c S |
211 | class TeachableCourseIE(TeachableBaseIE): |
212 | _VALID_URL = r'''(?x) | |
213 | (?: | |
add96eb9 | 214 | {}https?://(?P<site_t>[^/]+)| |
215 | https?://(?:www\.)?(?P<site>{}) | |
5ee7ae5c S |
216 | ) |
217 | /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+) | |
add96eb9 | 218 | '''.format(*TeachableBaseIE._VALID_URL_SUB_TUPLE) |
7fc60f4e | 219 | _TESTS = [{ |
08a27407 | 220 | 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', |
7fc60f4e | 221 | 'info_dict': { |
5ee7ae5c | 222 | 'id': 'essential-web-developer-course', |
7fc60f4e S |
223 | 'title': 'The Essential Web Developer Course (Free)', |
224 | }, | |
225 | 'playlist_count': 192, | |
226 | }, { | |
08a27407 | 227 | 'url': 'http://v1.upskillcourses.com/courses/119763/', |
7fc60f4e S |
228 | 'only_matching': True, |
229 | }, { | |
08a27407 | 230 | 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', |
7fc60f4e | 231 | 'only_matching': True, |
5ee7ae5c | 232 | }, { |
38fa761a | 233 | 'url': 'https://gns3.teachable.com/courses/enrolled/423415', |
5ee7ae5c S |
234 | 'only_matching': True, |
235 | }, { | |
236 | 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', | |
237 | 'only_matching': True, | |
238 | }, { | |
239 | 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', | |
240 | 'only_matching': True, | |
7fc60f4e S |
241 | }] |
242 | ||
243 | @classmethod | |
244 | def suitable(cls, url): | |
add96eb9 | 245 | return False if TeachableIE.suitable(url) else super().suitable(url) |
7fc60f4e S |
246 | |
247 | def _real_extract(self, url): | |
5ad28e7f | 248 | mobj = self._match_valid_url(url) |
5ee7ae5c S |
249 | site = mobj.group('site') or mobj.group('site_t') |
250 | course_id = mobj.group('id') | |
251 | ||
252 | self._login(site) | |
253 | ||
254 | prefixed = url.startswith(self._URL_PREFIX) | |
255 | if prefixed: | |
256 | prefix = self._URL_PREFIX | |
257 | url = url[len(prefix):] | |
7fc60f4e S |
258 | |
259 | webpage = self._download_webpage(url, course_id) | |
260 | ||
add96eb9 | 261 | url_base = f'https://{site}/' |
7fc60f4e S |
262 | |
263 | entries = [] | |
264 | ||
265 | for mobj in re.finditer( | |
266 | r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', | |
267 | webpage): | |
268 | li = mobj.group('li') | |
29f7c58a | 269 | if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): |
7fc60f4e S |
270 | continue |
271 | lecture_url = self._search_regex( | |
272 | r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, | |
273 | 'lecture url', default=None, group='url') | |
274 | if not lecture_url: | |
275 | continue | |
276 | lecture_id = self._search_regex( | |
277 | r'/lectures/(\d+)', lecture_url, 'lecture id', default=None) | |
278 | title = self._html_search_regex( | |
279 | r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li, | |
280 | 'title', default=None) | |
5ee7ae5c S |
281 | entry_url = urljoin(url_base, lecture_url) |
282 | if prefixed: | |
283 | entry_url = self._URL_PREFIX + entry_url | |
7fc60f4e S |
284 | entries.append( |
285 | self.url_result( | |
5ee7ae5c S |
286 | entry_url, |
287 | ie=TeachableIE.ie_key(), video_id=lecture_id, | |
7fc60f4e S |
288 | video_title=clean_html(title))) |
289 | ||
290 | course_title = self._html_search_regex( | |
291 | (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h', | |
292 | r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'), | |
293 | webpage, 'course title', fatal=False) | |
294 | ||
295 | return self.playlist_result(entries, course_id, course_title) |