]>
Commit | Line | Data |
---|---|---|
32d687f5 | 1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
32d687f5 | 5 | |
6 | from .common import InfoExtractor | |
32d687f5 | 7 | |
32d687f5 | 8 | from ..utils import ( |
9 | ExtractorError, | |
5c2266df | 10 | sanitized_Request, |
32d687f5 | 11 | std_headers, |
8773f315 | 12 | urlencode_postdata, |
bcb668de | 13 | update_url_query, |
32d687f5 | 14 | ) |
15 | ||
16 | ||
17 | class SafariBaseIE(InfoExtractor): | |
18 | _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | |
31c48098 | 19 | _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' |
31c48098 S |
20 | _NETRC_MACHINE = 'safari' |
21 | ||
73cbd709 | 22 | _API_BASE = 'https://www.safaribooksonline.com/api/v1' |
31c48098 | 23 | _API_FORMAT = 'json' |
32d687f5 | 24 | |
25 | LOGGED_IN = False | |
26 | ||
27 | def _real_initialize(self): | |
e9c8999e | 28 | self._login() |
32d687f5 | 29 | |
30 | def _login(self): | |
e9c8999e S |
31 | # We only need to log in once for courses or individual videos |
32 | if self.LOGGED_IN: | |
33 | return | |
34 | ||
32d687f5 | 35 | (username, password) = self._get_login_info() |
36 | if username is None: | |
73cbd709 | 37 | return |
32d687f5 | 38 | |
e41acb63 | 39 | headers = std_headers.copy() |
32d687f5 | 40 | if 'Referer' not in headers: |
41 | headers['Referer'] = self._LOGIN_URL | |
e41acb63 | 42 | login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers) |
32d687f5 | 43 | |
44 | login_page = self._download_webpage( | |
e41acb63 | 45 | login_page_request, None, |
32d687f5 | 46 | 'Downloading login form') |
47 | ||
48 | csrf = self._html_search_regex( | |
31c48098 | 49 | r"name='csrfmiddlewaretoken'\s+value='([^']+)'", |
32d687f5 | 50 | login_page, 'csrf token') |
51 | ||
52 | login_form = { | |
53 | 'csrfmiddlewaretoken': csrf, | |
54 | 'email': username, | |
55 | 'password1': password, | |
56 | 'login': 'Sign In', | |
57 | 'next': '', | |
58 | } | |
59 | ||
5c2266df | 60 | request = sanitized_Request( |
8773f315 | 61 | self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) |
32d687f5 | 62 | login_page = self._download_webpage( |
63 | request, None, 'Logging in as %s' % username) | |
64 | ||
65 | if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | |
31c48098 S |
66 | raise ExtractorError( |
67 | 'Login failed; make sure your credentials are correct and try again.', | |
68 | expected=True) | |
32d687f5 | 69 | |
e9c8999e S |
70 | SafariBaseIE.LOGGED_IN = True |
71 | ||
32d687f5 | 72 | self.to_screen('Login successful') |
73 | ||
74 | ||
75 | class SafariIE(SafariBaseIE): | |
76 | IE_NAME = 'safari' | |
77 | IE_DESC = 'safaribooksonline.com online video' | |
697655a7 | 78 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html' |
31c48098 S |
79 | |
80 | _TESTS = [{ | |
81 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | |
bcb668de | 82 | 'md5': 'dcc5a425e79f2564148652616af1f2a3', |
32d687f5 | 83 | 'info_dict': { |
bcb668de | 84 | 'id': '0_qbqx90ic', |
32d687f5 | 85 | 'ext': 'mp4', |
bcb668de | 86 | 'title': 'Introduction to Hadoop Fundamentals LiveLessons', |
87 | 'timestamp': 1437758058, | |
88 | 'upload_date': '20150724', | |
89 | 'uploader_id': 'stork', | |
31c48098 | 90 | }, |
4fd35ee0 S |
91 | }, { |
92 | # non-digits in course id | |
93 | 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | |
94 | 'only_matching': True, | |
697655a7 S |
95 | }, { |
96 | 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', | |
97 | 'only_matching': True, | |
31c48098 | 98 | }] |
32d687f5 | 99 | |
100 | def _real_extract(self, url): | |
101 | mobj = re.match(self._VALID_URL, url) | |
3aec7176 S |
102 | video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) |
103 | ||
104 | webpage = self._download_webpage(url, video_id) | |
105 | reference_id = self._search_regex( | |
a6ccc3e5 | 106 | r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', |
3aec7176 S |
107 | webpage, 'kaltura reference id', group='id') |
108 | partner_id = self._search_regex( | |
a6ccc3e5 | 109 | r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', |
3aec7176 S |
110 | webpage, 'kaltura widget id', group='id') |
111 | ui_id = self._search_regex( | |
a6ccc3e5 | 112 | r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', |
3aec7176 | 113 | webpage, 'kaltura uiconf id', group='id') |
31c48098 | 114 | |
73cbd709 | 115 | query = { |
bcb668de | 116 | 'wid': '_%s' % partner_id, |
117 | 'uiconf_id': ui_id, | |
118 | 'flashvars[referenceId]': reference_id, | |
73cbd709 S |
119 | } |
120 | ||
121 | if self.LOGGED_IN: | |
122 | kaltura_session = self._download_json( | |
123 | '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), | |
3aec7176 | 124 | video_id, 'Downloading kaltura session JSON', |
73cbd709 S |
125 | 'Unable to download kaltura session JSON', fatal=False) |
126 | if kaltura_session: | |
127 | session = kaltura_session.get('session') | |
128 | if session: | |
129 | query['flashvars[ks]'] = session | |
130 | ||
131 | return self.url_result(update_url_query( | |
132 | 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), | |
133 | 'Kaltura') | |
32d687f5 | 134 | |
135 | ||
3aec7176 S |
136 | class SafariApiIE(SafariBaseIE): |
137 | IE_NAME = 'safari:api' | |
697655a7 | 138 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' |
3aec7176 | 139 | |
697655a7 | 140 | _TESTS = [{ |
3aec7176 S |
141 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', |
142 | 'only_matching': True, | |
697655a7 S |
143 | }, { |
144 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', | |
145 | 'only_matching': True, | |
146 | }] | |
3aec7176 S |
147 | |
148 | def _real_extract(self, url): | |
149 | mobj = re.match(self._VALID_URL, url) | |
150 | part = self._download_json( | |
151 | url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), | |
152 | 'Downloading part JSON') | |
153 | return self.url_result(part['web_url'], SafariIE.ie_key()) | |
154 | ||
155 | ||
32d687f5 | 156 | class SafariCourseIE(SafariBaseIE): |
157 | IE_NAME = 'safari:course' | |
158 | IE_DESC = 'safaribooksonline.com online courses' | |
159 | ||
2a0fcf61 | 160 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' |
32d687f5 | 161 | |
31c48098 S |
162 | _TESTS = [{ |
163 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | |
164 | 'info_dict': { | |
165 | 'id': '9780133392838', | |
166 | 'title': 'Hadoop Fundamentals LiveLessons', | |
167 | }, | |
168 | 'playlist_count': 22, | |
169 | 'skip': 'Requires safaribooksonline account credentials', | |
170 | }, { | |
171 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | |
172 | 'only_matching': True, | |
173 | }] | |
32d687f5 | 174 | |
175 | def _real_extract(self, url): | |
31c48098 | 176 | course_id = self._match_id(url) |
32d687f5 | 177 | |
31c48098 | 178 | course_json = self._download_json( |
73cbd709 | 179 | '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), |
31c48098 | 180 | course_id, 'Downloading course JSON') |
32d687f5 | 181 | |
182 | if 'chapters' not in course_json: | |
31c48098 S |
183 | raise ExtractorError( |
184 | 'No chapters found for course %s' % course_id, expected=True) | |
32d687f5 | 185 | |
186 | entries = [ | |
3aec7176 | 187 | self.url_result(chapter, SafariApiIE.ie_key()) |
31c48098 | 188 | for chapter in course_json['chapters']] |
32d687f5 | 189 | |
190 | course_title = course_json['title'] | |
191 | ||
192 | return self.playlist_result(entries, course_id, course_title) |