]>
Commit | Line | Data |
---|---|---|
dcdb292f | 1 | # coding: utf-8 |
32d687f5 | 2 | from __future__ import unicode_literals |
3 | ||
4 | import re | |
32d687f5 | 5 | |
6 | from .common import InfoExtractor | |
32d687f5 | 7 | |
32d687f5 | 8 | from ..utils import ( |
9 | ExtractorError, | |
5c2266df | 10 | sanitized_Request, |
32d687f5 | 11 | std_headers, |
8773f315 | 12 | urlencode_postdata, |
bcb668de | 13 | update_url_query, |
32d687f5 | 14 | ) |
15 | ||
16 | ||
17 | class SafariBaseIE(InfoExtractor): | |
18 | _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | |
31c48098 S |
19 | _NETRC_MACHINE = 'safari' |
20 | ||
73cbd709 | 21 | _API_BASE = 'https://www.safaribooksonline.com/api/v1' |
31c48098 | 22 | _API_FORMAT = 'json' |
32d687f5 | 23 | |
24 | LOGGED_IN = False | |
25 | ||
26 | def _real_initialize(self): | |
e9c8999e | 27 | self._login() |
32d687f5 | 28 | |
29 | def _login(self): | |
68217024 | 30 | username, password = self._get_login_info() |
32d687f5 | 31 | if username is None: |
73cbd709 | 32 | return |
32d687f5 | 33 | |
e41acb63 | 34 | headers = std_headers.copy() |
32d687f5 | 35 | if 'Referer' not in headers: |
36 | headers['Referer'] = self._LOGIN_URL | |
37 | ||
38 | login_page = self._download_webpage( | |
4244a13a S |
39 | self._LOGIN_URL, None, 'Downloading login form', headers=headers) |
40 | ||
41 | def is_logged(webpage): | |
42 | return any(re.search(p, webpage) for p in ( | |
43 | r'href=["\']/accounts/logout/', r'>Sign Out<')) | |
44 | ||
45 | if is_logged(login_page): | |
46 | self.LOGGED_IN = True | |
47 | return | |
32d687f5 | 48 | |
49 | csrf = self._html_search_regex( | |
31c48098 | 50 | r"name='csrfmiddlewaretoken'\s+value='([^']+)'", |
32d687f5 | 51 | login_page, 'csrf token') |
52 | ||
53 | login_form = { | |
54 | 'csrfmiddlewaretoken': csrf, | |
55 | 'email': username, | |
56 | 'password1': password, | |
57 | 'login': 'Sign In', | |
58 | 'next': '', | |
59 | } | |
60 | ||
5c2266df | 61 | request = sanitized_Request( |
8773f315 | 62 | self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) |
32d687f5 | 63 | login_page = self._download_webpage( |
e4d95865 | 64 | request, None, 'Logging in') |
32d687f5 | 65 | |
4244a13a | 66 | if not is_logged(login_page): |
31c48098 S |
67 | raise ExtractorError( |
68 | 'Login failed; make sure your credentials are correct and try again.', | |
69 | expected=True) | |
32d687f5 | 70 | |
f129c3f3 | 71 | self.LOGGED_IN = True |
e9c8999e | 72 | |
32d687f5 | 73 | |
74 | class SafariIE(SafariBaseIE): | |
75 | IE_NAME = 'safari' | |
76 | IE_DESC = 'safaribooksonline.com online video' | |
003fe73c S |
77 | _VALID_URL = r'''(?x) |
78 | https?:// | |
79 | (?:www\.)?safaribooksonline\.com/ | |
80 | (?: | |
81 | library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| | |
82 | videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) | |
83 | ) | |
84 | ''' | |
31c48098 S |
85 | |
86 | _TESTS = [{ | |
87 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | |
bcb668de | 88 | 'md5': 'dcc5a425e79f2564148652616af1f2a3', |
32d687f5 | 89 | 'info_dict': { |
bcb668de | 90 | 'id': '0_qbqx90ic', |
32d687f5 | 91 | 'ext': 'mp4', |
bcb668de | 92 | 'title': 'Introduction to Hadoop Fundamentals LiveLessons', |
93 | 'timestamp': 1437758058, | |
94 | 'upload_date': '20150724', | |
95 | 'uploader_id': 'stork', | |
31c48098 | 96 | }, |
4fd35ee0 S |
97 | }, { |
98 | # non-digits in course id | |
99 | 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | |
100 | 'only_matching': True, | |
697655a7 S |
101 | }, { |
102 | 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', | |
103 | 'only_matching': True, | |
003fe73c S |
104 | }, { |
105 | 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', | |
106 | 'only_matching': True, | |
31c48098 | 107 | }] |
32d687f5 | 108 | |
003fe73c S |
109 | _PARTNER_ID = '1926081' |
110 | _UICONF_ID = '29375172' | |
111 | ||
32d687f5 | 112 | def _real_extract(self, url): |
113 | mobj = re.match(self._VALID_URL, url) | |
003fe73c S |
114 | |
115 | reference_id = mobj.group('reference_id') | |
116 | if reference_id: | |
117 | video_id = reference_id | |
118 | partner_id = self._PARTNER_ID | |
119 | ui_id = self._UICONF_ID | |
120 | else: | |
121 | video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) | |
122 | ||
123 | webpage, urlh = self._download_webpage_handle(url, video_id) | |
124 | ||
125 | mobj = re.match(self._VALID_URL, urlh.geturl()) | |
126 | reference_id = mobj.group('reference_id') | |
127 | if not reference_id: | |
128 | reference_id = self._search_regex( | |
129 | r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', | |
130 | webpage, 'kaltura reference id', group='id') | |
131 | partner_id = self._search_regex( | |
132 | r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', | |
133 | webpage, 'kaltura widget id', default=self._PARTNER_ID, | |
134 | group='id') | |
135 | ui_id = self._search_regex( | |
136 | r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', | |
137 | webpage, 'kaltura uiconf id', default=self._UICONF_ID, | |
138 | group='id') | |
31c48098 | 139 | |
73cbd709 | 140 | query = { |
bcb668de | 141 | 'wid': '_%s' % partner_id, |
142 | 'uiconf_id': ui_id, | |
143 | 'flashvars[referenceId]': reference_id, | |
73cbd709 S |
144 | } |
145 | ||
146 | if self.LOGGED_IN: | |
147 | kaltura_session = self._download_json( | |
148 | '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), | |
3aec7176 | 149 | video_id, 'Downloading kaltura session JSON', |
73cbd709 S |
150 | 'Unable to download kaltura session JSON', fatal=False) |
151 | if kaltura_session: | |
152 | session = kaltura_session.get('session') | |
153 | if session: | |
154 | query['flashvars[ks]'] = session | |
155 | ||
156 | return self.url_result(update_url_query( | |
157 | 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), | |
158 | 'Kaltura') | |
32d687f5 | 159 | |
160 | ||
3aec7176 S |
161 | class SafariApiIE(SafariBaseIE): |
162 | IE_NAME = 'safari:api' | |
697655a7 | 163 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' |
3aec7176 | 164 | |
697655a7 | 165 | _TESTS = [{ |
3aec7176 S |
166 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', |
167 | 'only_matching': True, | |
697655a7 S |
168 | }, { |
169 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', | |
170 | 'only_matching': True, | |
171 | }] | |
3aec7176 S |
172 | |
173 | def _real_extract(self, url): | |
174 | mobj = re.match(self._VALID_URL, url) | |
175 | part = self._download_json( | |
176 | url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), | |
177 | 'Downloading part JSON') | |
178 | return self.url_result(part['web_url'], SafariIE.ie_key()) | |
179 | ||
180 | ||
32d687f5 | 181 | class SafariCourseIE(SafariBaseIE): |
182 | IE_NAME = 'safari:course' | |
183 | IE_DESC = 'safaribooksonline.com online courses' | |
184 | ||
a26b174c S |
185 | _VALID_URL = r'''(?x) |
186 | https?:// | |
187 | (?: | |
003fe73c S |
188 | (?:www\.)?safaribooksonline\.com/ |
189 | (?: | |
190 | library/view/[^/]+| | |
191 | api/v1/book| | |
192 | videos/[^/]+ | |
193 | )| | |
a26b174c S |
194 | techbus\.safaribooksonline\.com |
195 | ) | |
003fe73c | 196 | /(?P<id>[^/]+) |
a26b174c | 197 | ''' |
32d687f5 | 198 | |
31c48098 S |
199 | _TESTS = [{ |
200 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | |
201 | 'info_dict': { | |
202 | 'id': '9780133392838', | |
203 | 'title': 'Hadoop Fundamentals LiveLessons', | |
204 | }, | |
205 | 'playlist_count': 22, | |
206 | 'skip': 'Requires safaribooksonline account credentials', | |
207 | }, { | |
208 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | |
209 | 'only_matching': True, | |
a26b174c S |
210 | }, { |
211 | 'url': 'http://techbus.safaribooksonline.com/9780134426365', | |
212 | 'only_matching': True, | |
003fe73c S |
213 | }, { |
214 | 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', | |
215 | 'only_matching': True, | |
31c48098 | 216 | }] |
32d687f5 | 217 | |
003fe73c S |
218 | @classmethod |
219 | def suitable(cls, url): | |
220 | return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) | |
221 | else super(SafariCourseIE, cls).suitable(url)) | |
222 | ||
32d687f5 | 223 | def _real_extract(self, url): |
31c48098 | 224 | course_id = self._match_id(url) |
32d687f5 | 225 | |
31c48098 | 226 | course_json = self._download_json( |
73cbd709 | 227 | '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), |
31c48098 | 228 | course_id, 'Downloading course JSON') |
32d687f5 | 229 | |
230 | if 'chapters' not in course_json: | |
31c48098 S |
231 | raise ExtractorError( |
232 | 'No chapters found for course %s' % course_id, expected=True) | |
32d687f5 | 233 | |
234 | entries = [ | |
3aec7176 | 235 | self.url_result(chapter, SafariApiIE.ie_key()) |
31c48098 | 236 | for chapter in course_json['chapters']] |
32d687f5 | 237 | |
238 | course_title = course_json['title'] | |
239 | ||
240 | return self.playlist_result(entries, course_id, course_title) |