]>
Commit | Line | Data |
---|---|---|
32d687f5 | 1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
32d687f5 | 5 | |
6 | from .common import InfoExtractor | |
4fcaa4f4 | 7 | from .brightcove import BrightcoveLegacyIE |
32d687f5 | 8 | |
32d687f5 | 9 | from ..utils import ( |
10 | ExtractorError, | |
5c2266df | 11 | sanitized_Request, |
32d687f5 | 12 | smuggle_url, |
13 | std_headers, | |
8773f315 | 14 | urlencode_postdata, |
32d687f5 | 15 | ) |
16 | ||
17 | ||
18 | class SafariBaseIE(InfoExtractor): | |
19 | _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | |
31c48098 | 20 | _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' |
31c48098 S |
21 | _NETRC_MACHINE = 'safari' |
22 | ||
23 | _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' | |
24 | _API_FORMAT = 'json' | |
32d687f5 | 25 | |
26 | LOGGED_IN = False | |
27 | ||
28 | def _real_initialize(self): | |
29 | # We only need to log in once for courses or individual videos | |
31c48098 | 30 | if not self.LOGGED_IN: |
32d687f5 | 31 | self._login() |
32 | SafariBaseIE.LOGGED_IN = True | |
33 | ||
34 | def _login(self): | |
35 | (username, password) = self._get_login_info() | |
36 | if username is None: | |
e269d3ae | 37 | self.raise_login_required('safaribooksonline.com account is required') |
32d687f5 | 38 | |
39 | headers = std_headers | |
40 | if 'Referer' not in headers: | |
41 | headers['Referer'] = self._LOGIN_URL | |
42 | ||
43 | login_page = self._download_webpage( | |
44 | self._LOGIN_URL, None, | |
45 | 'Downloading login form') | |
46 | ||
47 | csrf = self._html_search_regex( | |
31c48098 | 48 | r"name='csrfmiddlewaretoken'\s+value='([^']+)'", |
32d687f5 | 49 | login_page, 'csrf token') |
50 | ||
51 | login_form = { | |
52 | 'csrfmiddlewaretoken': csrf, | |
53 | 'email': username, | |
54 | 'password1': password, | |
55 | 'login': 'Sign In', | |
56 | 'next': '', | |
57 | } | |
58 | ||
5c2266df | 59 | request = sanitized_Request( |
8773f315 | 60 | self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) |
32d687f5 | 61 | login_page = self._download_webpage( |
62 | request, None, 'Logging in as %s' % username) | |
63 | ||
64 | if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | |
31c48098 S |
65 | raise ExtractorError( |
66 | 'Login failed; make sure your credentials are correct and try again.', | |
67 | expected=True) | |
32d687f5 | 68 | |
69 | self.to_screen('Login successful') | |
70 | ||
71 | ||
72 | class SafariIE(SafariBaseIE): | |
73 | IE_NAME = 'safari' | |
74 | IE_DESC = 'safaribooksonline.com online video' | |
31c48098 S |
75 | _VALID_URL = r'''(?x)https?:// |
76 | (?:www\.)?safaribooksonline\.com/ | |
77 | (?: | |
78 | library/view/[^/]+| | |
79 | api/v1/book | |
80 | )/ | |
2a0fcf61 | 81 | (?P<course_id>[^/]+)/ |
31c48098 S |
82 | (?:chapter(?:-content)?/)? |
83 | (?P<part>part\d+)\.html | |
84 | ''' | |
85 | ||
86 | _TESTS = [{ | |
87 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | |
32d687f5 | 88 | 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', |
89 | 'info_dict': { | |
31c48098 | 90 | 'id': '2842601850001', |
32d687f5 | 91 | 'ext': 'mp4', |
92 | 'title': 'Introduction', | |
31c48098 S |
93 | }, |
94 | 'skip': 'Requires safaribooksonline account credentials', | |
95 | }, { | |
96 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | |
97 | 'only_matching': True, | |
4fd35ee0 S |
98 | }, { |
99 | # non-digits in course id | |
100 | 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | |
101 | 'only_matching': True, | |
31c48098 | 102 | }] |
32d687f5 | 103 | |
104 | def _real_extract(self, url): | |
105 | mobj = re.match(self._VALID_URL, url) | |
31c48098 | 106 | course_id = mobj.group('course_id') |
32d687f5 | 107 | part = mobj.group('part') |
108 | ||
31c48098 S |
109 | webpage = self._download_webpage( |
110 | '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), | |
111 | part) | |
112 | ||
4fcaa4f4 | 113 | bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) |
32d687f5 | 114 | if not bc_url: |
115 | raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) | |
116 | ||
3b7d9aa4 | 117 | return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy') |
32d687f5 | 118 | |
119 | ||
120 | class SafariCourseIE(SafariBaseIE): | |
121 | IE_NAME = 'safari:course' | |
122 | IE_DESC = 'safaribooksonline.com online courses' | |
123 | ||
2a0fcf61 | 124 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' |
32d687f5 | 125 | |
31c48098 S |
126 | _TESTS = [{ |
127 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | |
128 | 'info_dict': { | |
129 | 'id': '9780133392838', | |
130 | 'title': 'Hadoop Fundamentals LiveLessons', | |
131 | }, | |
132 | 'playlist_count': 22, | |
133 | 'skip': 'Requires safaribooksonline account credentials', | |
134 | }, { | |
135 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | |
136 | 'only_matching': True, | |
137 | }] | |
32d687f5 | 138 | |
139 | def _real_extract(self, url): | |
31c48098 | 140 | course_id = self._match_id(url) |
32d687f5 | 141 | |
31c48098 | 142 | course_json = self._download_json( |
32d687f5 | 143 | '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), |
31c48098 | 144 | course_id, 'Downloading course JSON') |
32d687f5 | 145 | |
146 | if 'chapters' not in course_json: | |
31c48098 S |
147 | raise ExtractorError( |
148 | 'No chapters found for course %s' % course_id, expected=True) | |
32d687f5 | 149 | |
150 | entries = [ | |
31c48098 S |
151 | self.url_result(chapter, 'Safari') |
152 | for chapter in course_json['chapters']] | |
32d687f5 | 153 | |
154 | course_title = course_json['title'] | |
155 | ||
156 | return self.playlist_result(entries, course_id, course_title) |