]>
Commit | Line | Data |
---|---|---|
32d687f5 | 1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
32d687f5 | 5 | |
6 | from .common import InfoExtractor | |
7 | from .brightcove import BrightcoveIE | |
8 | ||
9 | from ..compat import ( | |
10 | compat_urllib_parse, | |
11 | compat_urllib_request, | |
12 | ) | |
13 | from ..utils import ( | |
14 | ExtractorError, | |
15 | smuggle_url, | |
16 | std_headers, | |
17 | ) | |
18 | ||
19 | ||
20 | class SafariBaseIE(InfoExtractor): | |
21 | _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | |
31c48098 S |
22 | _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' |
23 | _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com' | |
24 | _NETRC_MACHINE = 'safari' | |
25 | ||
26 | _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' | |
27 | _API_FORMAT = 'json' | |
32d687f5 | 28 | |
29 | LOGGED_IN = False | |
30 | ||
31 | def _real_initialize(self): | |
32 | # We only need to log in once for courses or individual videos | |
31c48098 | 33 | if not self.LOGGED_IN: |
32d687f5 | 34 | self._login() |
35 | SafariBaseIE.LOGGED_IN = True | |
36 | ||
37 | def _login(self): | |
38 | (username, password) = self._get_login_info() | |
39 | if username is None: | |
40 | raise ExtractorError( | |
41 | self._ACCOUNT_CREDENTIALS_HINT, | |
42 | expected=True) | |
43 | ||
44 | headers = std_headers | |
45 | if 'Referer' not in headers: | |
46 | headers['Referer'] = self._LOGIN_URL | |
47 | ||
48 | login_page = self._download_webpage( | |
49 | self._LOGIN_URL, None, | |
50 | 'Downloading login form') | |
51 | ||
52 | csrf = self._html_search_regex( | |
31c48098 | 53 | r"name='csrfmiddlewaretoken'\s+value='([^']+)'", |
32d687f5 | 54 | login_page, 'csrf token') |
55 | ||
56 | login_form = { | |
57 | 'csrfmiddlewaretoken': csrf, | |
58 | 'email': username, | |
59 | 'password1': password, | |
60 | 'login': 'Sign In', | |
61 | 'next': '', | |
62 | } | |
63 | ||
64 | request = compat_urllib_request.Request( | |
65 | self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) | |
66 | login_page = self._download_webpage( | |
67 | request, None, 'Logging in as %s' % username) | |
68 | ||
69 | if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | |
31c48098 S |
70 | raise ExtractorError( |
71 | 'Login failed; make sure your credentials are correct and try again.', | |
72 | expected=True) | |
32d687f5 | 73 | |
74 | self.to_screen('Login successful') | |
75 | ||
76 | ||
77 | class SafariIE(SafariBaseIE): | |
78 | IE_NAME = 'safari' | |
79 | IE_DESC = 'safaribooksonline.com online video' | |
31c48098 S |
80 | _VALID_URL = r'''(?x)https?:// |
81 | (?:www\.)?safaribooksonline\.com/ | |
82 | (?: | |
83 | library/view/[^/]+| | |
84 | api/v1/book | |
85 | )/ | |
2a0fcf61 | 86 | (?P<course_id>[^/]+)/ |
31c48098 S |
87 | (?:chapter(?:-content)?/)? |
88 | (?P<part>part\d+)\.html | |
89 | ''' | |
90 | ||
91 | _TESTS = [{ | |
92 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | |
32d687f5 | 93 | 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', |
94 | 'info_dict': { | |
31c48098 | 95 | 'id': '2842601850001', |
32d687f5 | 96 | 'ext': 'mp4', |
97 | 'title': 'Introduction', | |
31c48098 S |
98 | }, |
99 | 'skip': 'Requires safaribooksonline account credentials', | |
100 | }, { | |
101 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | |
102 | 'only_matching': True, | |
4fd35ee0 S |
103 | }, { |
104 | # non-digits in course id | |
105 | 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | |
106 | 'only_matching': True, | |
31c48098 | 107 | }] |
32d687f5 | 108 | |
109 | def _real_extract(self, url): | |
110 | mobj = re.match(self._VALID_URL, url) | |
31c48098 | 111 | course_id = mobj.group('course_id') |
32d687f5 | 112 | part = mobj.group('part') |
113 | ||
31c48098 S |
114 | webpage = self._download_webpage( |
115 | '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), | |
116 | part) | |
117 | ||
32d687f5 | 118 | bc_url = BrightcoveIE._extract_brightcove_url(webpage) |
119 | if not bc_url: | |
120 | raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) | |
121 | ||
31c48098 | 122 | return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') |
32d687f5 | 123 | |
124 | ||
125 | class SafariCourseIE(SafariBaseIE): | |
126 | IE_NAME = 'safari:course' | |
127 | IE_DESC = 'safaribooksonline.com online courses' | |
128 | ||
2a0fcf61 | 129 | _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' |
32d687f5 | 130 | |
31c48098 S |
131 | _TESTS = [{ |
132 | 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', | |
133 | 'info_dict': { | |
134 | 'id': '9780133392838', | |
135 | 'title': 'Hadoop Fundamentals LiveLessons', | |
136 | }, | |
137 | 'playlist_count': 22, | |
138 | 'skip': 'Requires safaribooksonline account credentials', | |
139 | }, { | |
140 | 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', | |
141 | 'only_matching': True, | |
142 | }] | |
32d687f5 | 143 | |
144 | def _real_extract(self, url): | |
31c48098 | 145 | course_id = self._match_id(url) |
32d687f5 | 146 | |
31c48098 | 147 | course_json = self._download_json( |
32d687f5 | 148 | '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), |
31c48098 | 149 | course_id, 'Downloading course JSON') |
32d687f5 | 150 | |
151 | if 'chapters' not in course_json: | |
31c48098 S |
152 | raise ExtractorError( |
153 | 'No chapters found for course %s' % course_id, expected=True) | |
32d687f5 | 154 | |
155 | entries = [ | |
31c48098 S |
156 | self.url_result(chapter, 'Safari') |
157 | for chapter in course_json['chapters']] | |
32d687f5 | 158 | |
159 | course_title = course_json['title'] | |
160 | ||
161 | return self.playlist_result(entries, course_id, course_title) |