]>
Commit | Line | Data |
---|---|---|
32d687f5 | 1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
32d687f5 | 5 | |
6 | from .common import InfoExtractor | |
32d687f5 | 7 | |
32d687f5 | 8 | from ..utils import ( |
9 | ExtractorError, | |
5c2266df | 10 | sanitized_Request, |
32d687f5 | 11 | std_headers, |
8773f315 | 12 | urlencode_postdata, |
bcb668de | 13 | update_url_query, |
32d687f5 | 14 | ) |
15 | ||
16 | ||
17 | class SafariBaseIE(InfoExtractor): | |
18 | _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | |
31c48098 | 19 | _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' |
31c48098 S |
20 | _NETRC_MACHINE = 'safari' |
21 | ||
73cbd709 | 22 | _API_BASE = 'https://www.safaribooksonline.com/api/v1' |
31c48098 | 23 | _API_FORMAT = 'json' |
32d687f5 | 24 | |
25 | LOGGED_IN = False | |
26 | ||
27 | def _real_initialize(self): | |
e9c8999e | 28 | self._login() |
32d687f5 | 29 | |
30 | def _login(self): | |
e9c8999e S |
31 | # We only need to log in once for courses or individual videos |
32 | if self.LOGGED_IN: | |
33 | return | |
34 | ||
32d687f5 | 35 | (username, password) = self._get_login_info() |
36 | if username is None: | |
73cbd709 | 37 | return |
32d687f5 | 38 | |
e41acb63 | 39 | headers = std_headers.copy() |
32d687f5 | 40 | if 'Referer' not in headers: |
41 | headers['Referer'] = self._LOGIN_URL | |
e41acb63 | 42 | login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers) |
32d687f5 | 43 | |
44 | login_page = self._download_webpage( | |
e41acb63 | 45 | login_page_request, None, |
32d687f5 | 46 | 'Downloading login form') |
47 | ||
48 | csrf = self._html_search_regex( | |
31c48098 | 49 | r"name='csrfmiddlewaretoken'\s+value='([^']+)'", |
32d687f5 | 50 | login_page, 'csrf token') |
51 | ||
52 | login_form = { | |
53 | 'csrfmiddlewaretoken': csrf, | |
54 | 'email': username, | |
55 | 'password1': password, | |
56 | 'login': 'Sign In', | |
57 | 'next': '', | |
58 | } | |
59 | ||
5c2266df | 60 | request = sanitized_Request( |
8773f315 | 61 | self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) |
32d687f5 | 62 | login_page = self._download_webpage( |
63 | request, None, 'Logging in as %s' % username) | |
64 | ||
65 | if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | |
31c48098 S |
66 | raise ExtractorError( |
67 | 'Login failed; make sure your credentials are correct and try again.', | |
68 | expected=True) | |
32d687f5 | 69 | |
e9c8999e S |
70 | SafariBaseIE.LOGGED_IN = True |
71 | ||
32d687f5 | 72 | self.to_screen('Login successful') |
73 | ||
74 | ||
75 | class SafariIE(SafariBaseIE): | |
76 | IE_NAME = 'safari' | |
77 | IE_DESC = 'safaribooksonline.com online video' | |
3aec7176 | 78 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html' |
31c48098 S |
79 | |
80 | _TESTS = [{ | |
81 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | |
bcb668de | 82 | 'md5': 'dcc5a425e79f2564148652616af1f2a3', |
32d687f5 | 83 | 'info_dict': { |
bcb668de | 84 | 'id': '0_qbqx90ic', |
32d687f5 | 85 | 'ext': 'mp4', |
bcb668de | 86 | 'title': 'Introduction to Hadoop Fundamentals LiveLessons', |
87 | 'timestamp': 1437758058, | |
88 | 'upload_date': '20150724', | |
89 | 'uploader_id': 'stork', | |
31c48098 | 90 | }, |
4fd35ee0 S |
91 | }, { |
92 | # non-digits in course id | |
93 | 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | |
94 | 'only_matching': True, | |
31c48098 | 95 | }] |
32d687f5 | 96 | |
97 | def _real_extract(self, url): | |
98 | mobj = re.match(self._VALID_URL, url) | |
3aec7176 S |
99 | video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) |
100 | ||
101 | webpage = self._download_webpage(url, video_id) | |
102 | reference_id = self._search_regex( | |
103 | r'data-reference-id=(["\'])(?P<id>.+?)\1', | |
104 | webpage, 'kaltura reference id', group='id') | |
105 | partner_id = self._search_regex( | |
106 | r'data-partner-id=(["\'])(?P<id>.+?)\1', | |
107 | webpage, 'kaltura widget id', group='id') | |
108 | ui_id = self._search_regex( | |
109 | r'data-ui-id=(["\'])(?P<id>.+?)\1', | |
110 | webpage, 'kaltura uiconf id', group='id') | |
31c48098 | 111 | |
73cbd709 | 112 | query = { |
bcb668de | 113 | 'wid': '_%s' % partner_id, |
114 | 'uiconf_id': ui_id, | |
115 | 'flashvars[referenceId]': reference_id, | |
73cbd709 S |
116 | } |
117 | ||
118 | if self.LOGGED_IN: | |
119 | kaltura_session = self._download_json( | |
120 | '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), | |
3aec7176 | 121 | video_id, 'Downloading kaltura session JSON', |
73cbd709 S |
122 | 'Unable to download kaltura session JSON', fatal=False) |
123 | if kaltura_session: | |
124 | session = kaltura_session.get('session') | |
125 | if session: | |
126 | query['flashvars[ks]'] = session | |
127 | ||
128 | return self.url_result(update_url_query( | |
129 | 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), | |
130 | 'Kaltura') | |
32d687f5 | 131 | |
132 | ||
3aec7176 S |
133 | class SafariApiIE(SafariBaseIE): |
134 | IE_NAME = 'safari:api' | |
135 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html' | |
136 | ||
137 | _TEST = { | |
138 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | |
139 | 'only_matching': True, | |
140 | } | |
141 | ||
142 | def _real_extract(self, url): | |
143 | mobj = re.match(self._VALID_URL, url) | |
144 | part = self._download_json( | |
145 | url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), | |
146 | 'Downloading part JSON') | |
147 | return self.url_result(part['web_url'], SafariIE.ie_key()) | |
148 | ||
149 | ||
32d687f5 | 150 | class SafariCourseIE(SafariBaseIE): |
151 | IE_NAME = 'safari:course' | |
152 | IE_DESC = 'safaribooksonline.com online courses' | |
153 | ||
2a0fcf61 | 154 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' |
32d687f5 | 155 | |
31c48098 S |
156 | _TESTS = [{ |
157 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | |
158 | 'info_dict': { | |
159 | 'id': '9780133392838', | |
160 | 'title': 'Hadoop Fundamentals LiveLessons', | |
161 | }, | |
162 | 'playlist_count': 22, | |
163 | 'skip': 'Requires safaribooksonline account credentials', | |
164 | }, { | |
165 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | |
166 | 'only_matching': True, | |
167 | }] | |
32d687f5 | 168 | |
169 | def _real_extract(self, url): | |
31c48098 | 170 | course_id = self._match_id(url) |
32d687f5 | 171 | |
31c48098 | 172 | course_json = self._download_json( |
73cbd709 | 173 | '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), |
31c48098 | 174 | course_id, 'Downloading course JSON') |
32d687f5 | 175 | |
176 | if 'chapters' not in course_json: | |
31c48098 S |
177 | raise ExtractorError( |
178 | 'No chapters found for course %s' % course_id, expected=True) | |
32d687f5 | 179 | |
180 | entries = [ | |
3aec7176 | 181 | self.url_result(chapter, SafariApiIE.ie_key()) |
31c48098 | 182 | for chapter in course_json['chapters']] |
32d687f5 | 183 | |
184 | course_title = course_json['title'] | |
185 | ||
186 | return self.playlist_result(entries, course_id, course_title) |