]>
Commit | Line | Data |
---|---|---|
7fc60f4e S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from .wistia import WistiaIE | |
7 | from ..compat import compat_str | |
8 | from ..utils import ( | |
9 | clean_html, | |
10 | ExtractorError, | |
11 | get_element_by_class, | |
12 | urlencode_postdata, | |
13 | urljoin, | |
14 | ) | |
15 | ||
16 | ||
17 | class UpskillBaseIE(InfoExtractor): | |
18 | _LOGIN_URL = 'http://upskillcourses.com/sign_in' | |
19 | _NETRC_MACHINE = 'upskill' | |
20 | ||
21 | def _real_initialize(self): | |
22 | self._login() | |
23 | ||
24 | def _login(self): | |
25 | username, password = self._get_login_info() | |
26 | if username is None: | |
27 | return | |
28 | ||
29 | login_page, urlh = self._download_webpage_handle( | |
30 | self._LOGIN_URL, None, 'Downloading login page') | |
31 | ||
32 | login_url = compat_str(urlh.geturl()) | |
33 | ||
34 | login_form = self._hidden_inputs(login_page) | |
35 | ||
36 | login_form.update({ | |
37 | 'user[email]': username, | |
38 | 'user[password]': password, | |
39 | }) | |
40 | ||
41 | post_url = self._search_regex( | |
42 | r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page, | |
43 | 'post url', default=login_url, group='url') | |
44 | ||
45 | if not post_url.startswith('http'): | |
46 | post_url = urljoin(login_url, post_url) | |
47 | ||
48 | response = self._download_webpage( | |
49 | post_url, None, 'Logging in', | |
50 | data=urlencode_postdata(login_form), | |
51 | headers={ | |
52 | 'Content-Type': 'application/x-www-form-urlencoded', | |
53 | 'Referer': login_url, | |
54 | }) | |
55 | ||
56 | # Successful login | |
57 | if any(re.search(p, response) for p in ( | |
58 | r'class=["\']user-signout', | |
59 | r'<a[^>]+\bhref=["\']/sign_out', | |
60 | r'>\s*Log out\s*<')): | |
61 | return | |
62 | ||
63 | message = get_element_by_class('alert', response) | |
64 | if message is not None: | |
65 | raise ExtractorError( | |
66 | 'Unable to login: %s' % clean_html(message), expected=True) | |
67 | ||
68 | raise ExtractorError('Unable to log in') | |
69 | ||
70 | ||
71 | class UpskillIE(UpskillBaseIE): | |
72 | _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)' | |
73 | ||
74 | _TESTS = [{ | |
75 | 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', | |
76 | 'info_dict': { | |
77 | 'id': 'uzw6zw58or', | |
78 | 'ext': 'mp4', | |
79 | 'title': 'Welcome to the Course!', | |
80 | 'description': 'md5:8d66c13403783370af62ca97a7357bdd', | |
81 | 'duration': 138.763, | |
82 | 'timestamp': 1479846621, | |
83 | 'upload_date': '20161122', | |
84 | }, | |
85 | 'params': { | |
86 | 'skip_download': True, | |
87 | }, | |
88 | }, { | |
89 | 'url': 'http://upskillcourses.com/courses/119763/lectures/1747100', | |
90 | 'only_matching': True, | |
91 | }] | |
92 | ||
93 | def _real_extract(self, url): | |
94 | video_id = self._match_id(url) | |
95 | ||
96 | webpage = self._download_webpage(url, video_id) | |
97 | ||
98 | wistia_url = WistiaIE._extract_url(webpage) | |
99 | if not wistia_url: | |
100 | if any(re.search(p, webpage) for p in ( | |
101 | r'class=["\']lecture-contents-locked', | |
102 | r'>\s*Lecture contents locked', | |
103 | r'id=["\']lecture-locked')): | |
104 | self.raise_login_required('Lecture contents locked') | |
105 | ||
106 | title = self._og_search_title(webpage, default=None) | |
107 | ||
108 | return { | |
109 | '_type': 'url_transparent', | |
110 | 'url': wistia_url, | |
111 | 'ie_key': WistiaIE.ie_key(), | |
112 | 'title': title, | |
113 | } | |
114 | ||
115 | ||
116 | class UpskillCourseIE(UpskillBaseIE): | |
117 | _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)' | |
118 | _TESTS = [{ | |
119 | 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/', | |
120 | 'info_dict': { | |
121 | 'id': '119763', | |
122 | 'title': 'The Essential Web Developer Course (Free)', | |
123 | }, | |
124 | 'playlist_count': 192, | |
125 | }, { | |
126 | 'url': 'http://upskillcourses.com/courses/119763/', | |
127 | 'only_matching': True, | |
128 | }, { | |
129 | 'url': 'http://upskillcourses.com/courses/enrolled/119763', | |
130 | 'only_matching': True, | |
131 | }] | |
132 | ||
133 | @classmethod | |
134 | def suitable(cls, url): | |
135 | return False if UpskillIE.suitable(url) else super( | |
136 | UpskillCourseIE, cls).suitable(url) | |
137 | ||
138 | def _real_extract(self, url): | |
139 | course_id = self._match_id(url) | |
140 | ||
141 | webpage = self._download_webpage(url, course_id) | |
142 | ||
143 | course_id = self._search_regex( | |
144 | r'data-course-id=["\'](\d+)', webpage, 'course id', | |
145 | default=course_id) | |
146 | ||
147 | entries = [] | |
148 | ||
149 | for mobj in re.finditer( | |
150 | r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', | |
151 | webpage): | |
152 | li = mobj.group('li') | |
153 | if 'fa-youtube-play' not in li: | |
154 | continue | |
155 | lecture_url = self._search_regex( | |
156 | r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, | |
157 | 'lecture url', default=None, group='url') | |
158 | if not lecture_url: | |
159 | continue | |
160 | lecture_id = self._search_regex( | |
161 | r'/lectures/(\d+)', lecture_url, 'lecture id', default=None) | |
162 | title = self._html_search_regex( | |
163 | r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li, | |
164 | 'title', default=None) | |
165 | entries.append( | |
166 | self.url_result( | |
167 | urljoin('http://upskillcourses.com/', lecture_url), | |
168 | ie=UpskillIE.ie_key(), video_id=lecture_id, | |
169 | video_title=clean_html(title))) | |
170 | ||
171 | course_title = self._html_search_regex( | |
172 | (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h', | |
173 | r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'), | |
174 | webpage, 'course title', fatal=False) | |
175 | ||
176 | return self.playlist_result(entries, course_id, course_title) |