]>
Commit | Line | Data |
---|---|---|
483fc223 S |
1 | from __future__ import unicode_literals |
2 | ||
756926ff | 3 | import re |
483fc223 | 4 | import json |
38eb2968 | 5 | import random |
4c57b485 | 6 | import collections |
483fc223 S |
7 | |
8 | from .common import InfoExtractor | |
9 | from ..compat import ( | |
10 | compat_str, | |
11 | compat_urllib_parse, | |
483fc223 S |
12 | compat_urlparse, |
13 | ) | |
14 | from ..utils import ( | |
15 | ExtractorError, | |
16 | int_or_none, | |
17 | parse_duration, | |
756926ff | 18 | qualities, |
5c2266df | 19 | sanitized_Request, |
483fc223 S |
20 | ) |
21 | ||
22 | ||
563772ed S |
23 | class PluralsightBaseIE(InfoExtractor): |
24 | _API_BASE = 'http://app.pluralsight.com' | |
25 | ||
26 | ||
27 | class PluralsightIE(PluralsightBaseIE): | |
483fc223 | 28 | IE_NAME = 'pluralsight' |
71bd93b8 | 29 | _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?' |
c3a227d1 | 30 | _LOGIN_URL = 'https://app.pluralsight.com/id/' |
563772ed | 31 | |
483fc223 S |
32 | _NETRC_MACHINE = 'pluralsight' |
33 | ||
71bd93b8 | 34 | _TESTS = [{ |
483fc223 S |
35 | 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', |
36 | 'md5': '4d458cf5cf4c593788672419a8dd4cf8', | |
37 | 'info_dict': { | |
38 | 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', | |
39 | 'ext': 'mp4', | |
40 | 'title': 'Management of SQL Server - Demo Monitoring', | |
41 | 'duration': 338, | |
42 | }, | |
43 | 'skip': 'Requires pluralsight account credentials', | |
71bd93b8 S |
44 | }, { |
45 | 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', | |
46 | 'only_matching': True, | |
c23e2664 S |
47 | }, { |
48 | # available without pluralsight account | |
49 | 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', | |
50 | 'only_matching': True, | |
71bd93b8 | 51 | }] |
483fc223 S |
52 | |
53 | def _real_initialize(self): | |
54 | self._login() | |
55 | ||
56 | def _login(self): | |
57 | (username, password) = self._get_login_info() | |
58 | if username is None: | |
c23e2664 | 59 | return |
483fc223 S |
60 | |
61 | login_page = self._download_webpage( | |
62 | self._LOGIN_URL, None, 'Downloading login page') | |
63 | ||
64 | login_form = self._hidden_inputs(login_page) | |
65 | ||
66 | login_form.update({ | |
67 | 'Username': username.encode('utf-8'), | |
68 | 'Password': password.encode('utf-8'), | |
69 | }) | |
70 | ||
71 | post_url = self._search_regex( | |
72 | r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, | |
73 | 'post url', default=self._LOGIN_URL, group='url') | |
74 | ||
75 | if not post_url.startswith('http'): | |
76 | post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) | |
77 | ||
5c2266df | 78 | request = sanitized_Request( |
483fc223 S |
79 | post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) |
80 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
81 | ||
82 | response = self._download_webpage( | |
83 | request, None, 'Logging in as %s' % username) | |
84 | ||
85 | error = self._search_regex( | |
86 | r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', | |
87 | response, 'error message', default=None) | |
88 | if error: | |
89 | raise ExtractorError('Unable to login: %s' % error, expected=True) | |
90 | ||
7e508ff2 S |
91 | if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): |
92 | raise ExtractorError('Unable to log in') | |
93 | ||
483fc223 | 94 | def _real_extract(self, url): |
71bd93b8 S |
95 | qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) |
96 | ||
97 | author = qs.get('author', [None])[0] | |
98 | name = qs.get('name', [None])[0] | |
99 | clip_id = qs.get('clip', [None])[0] | |
100 | course = qs.get('course', [None])[0] | |
101 | ||
102 | if any(not f for f in (author, name, clip_id, course,)): | |
103 | raise ExtractorError('Invalid URL', expected=True) | |
483fc223 S |
104 | |
105 | display_id = '%s-%s' % (name, clip_id) | |
106 | ||
107 | webpage = self._download_webpage(url, display_id) | |
108 | ||
02f0da20 S |
109 | modules = self._search_regex( |
110 | r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)', | |
111 | webpage, 'modules', default=None) | |
112 | ||
113 | if modules: | |
114 | collection = self._parse_json(modules, display_id) | |
115 | else: | |
116 | # Webpage may be served in different layout (see | |
117 | # https://github.com/rg3/youtube-dl/issues/7607) | |
118 | collection = self._parse_json( | |
119 | self._search_regex( | |
120 | r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'), | |
121 | display_id)['course']['modules'] | |
483fc223 S |
122 | |
123 | module, clip = None, None | |
124 | ||
125 | for module_ in collection: | |
02f0da20 | 126 | if name in (module_.get('moduleName'), module_.get('name')): |
483fc223 S |
127 | module = module_ |
128 | for clip_ in module_.get('clips', []): | |
129 | clip_index = clip_.get('clipIndex') | |
02f0da20 S |
130 | if clip_index is None: |
131 | clip_index = clip_.get('index') | |
483fc223 S |
132 | if clip_index is None: |
133 | continue | |
134 | if compat_str(clip_index) == clip_id: | |
135 | clip = clip_ | |
136 | break | |
137 | ||
138 | if not clip: | |
139 | raise ExtractorError('Unable to resolve clip') | |
140 | ||
141 | QUALITIES = { | |
142 | 'low': {'width': 640, 'height': 480}, | |
143 | 'medium': {'width': 848, 'height': 640}, | |
144 | 'high': {'width': 1024, 'height': 768}, | |
756926ff | 145 | 'high-widescreen': {'width': 1280, 'height': 720}, |
483fc223 S |
146 | } |
147 | ||
756926ff S |
148 | QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) |
149 | quality_key = qualities(QUALITIES_PREFERENCE) | |
150 | ||
4c57b485 S |
151 | AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) |
152 | ||
483fc223 | 153 | ALLOWED_QUALITIES = ( |
756926ff S |
154 | AllowedQuality('webm', ['high', ]), |
155 | AllowedQuality('mp4', ['low', 'medium', 'high', ]), | |
483fc223 S |
156 | ) |
157 | ||
756926ff S |
158 | # Some courses also offer widescreen resolution for high quality (see |
159 | # https://github.com/rg3/youtube-dl/issues/7766) | |
160 | widescreen = True if re.search( | |
161 | r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False | |
162 | best_quality = 'high-widescreen' if widescreen else 'high' | |
163 | if widescreen: | |
164 | for allowed_quality in ALLOWED_QUALITIES: | |
165 | allowed_quality.qualities.append(best_quality) | |
166 | ||
cf186b77 S |
167 | # In order to minimize the number of calls to ViewClip API and reduce |
168 | # the probability of being throttled or banned by Pluralsight we will request | |
0eebf34d | 169 | # only single format until formats listing was explicitly requested. |
4c57b485 S |
170 | if self._downloader.params.get('listformats', False): |
171 | allowed_qualities = ALLOWED_QUALITIES | |
172 | else: | |
173 | def guess_allowed_qualities(): | |
174 | req_format = self._downloader.params.get('format') or 'best' | |
edc70f4a | 175 | req_format_split = req_format.split('-', 1) |
4c57b485 S |
176 | if len(req_format_split) > 1: |
177 | req_ext, req_quality = req_format_split | |
178 | for allowed_quality in ALLOWED_QUALITIES: | |
179 | if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: | |
180 | return (AllowedQuality(req_ext, (req_quality, )), ) | |
181 | req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4' | |
756926ff | 182 | return (AllowedQuality(req_ext, (best_quality, )), ) |
4c57b485 S |
183 | allowed_qualities = guess_allowed_qualities() |
184 | ||
483fc223 | 185 | formats = [] |
756926ff S |
186 | for ext, qualities_ in allowed_qualities: |
187 | for quality in qualities_: | |
483fc223 S |
188 | f = QUALITIES[quality].copy() |
189 | clip_post = { | |
190 | 'a': author, | |
191 | 'cap': 'false', | |
192 | 'cn': clip_id, | |
193 | 'course': course, | |
194 | 'lc': 'en', | |
195 | 'm': name, | |
196 | 'mt': ext, | |
197 | 'q': '%dx%d' % (f['width'], f['height']), | |
198 | } | |
5c2266df | 199 | request = sanitized_Request( |
0533915a | 200 | '%s/training/Player/ViewClip' % self._API_BASE, |
483fc223 S |
201 | json.dumps(clip_post).encode('utf-8')) |
202 | request.add_header('Content-Type', 'application/json;charset=utf-8') | |
203 | format_id = '%s-%s' % (ext, quality) | |
204 | clip_url = self._download_webpage( | |
205 | request, display_id, 'Downloading %s URL' % format_id, fatal=False) | |
38eb2968 S |
206 | |
207 | # Pluralsight tracks multiple sequential calls to ViewClip API and start | |
208 | # to return 429 HTTP errors after some time (see | |
209 | # https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead | |
210 | # to account ban (see https://github.com/rg3/youtube-dl/issues/6842). | |
211 | # To somewhat reduce the probability of these consequences | |
212 | # we will sleep random amount of time before each call to ViewClip. | |
213 | self._sleep( | |
214 | random.randint(2, 5), display_id, | |
215 | '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') | |
216 | ||
483fc223 S |
217 | if not clip_url: |
218 | continue | |
219 | f.update({ | |
220 | 'url': clip_url, | |
221 | 'ext': ext, | |
222 | 'format_id': format_id, | |
756926ff | 223 | 'quality': quality_key(quality), |
483fc223 S |
224 | }) |
225 | formats.append(f) | |
226 | self._sort_formats(formats) | |
227 | ||
228 | # TODO: captions | |
229 | # http://www.pluralsight.com/training/Player/ViewClip + cap = true | |
230 | # or | |
231 | # http://www.pluralsight.com/training/Player/Captions | |
232 | # { a = author, cn = clip_id, lc = end, m = name } | |
233 | ||
234 | return { | |
235 | 'id': clip['clipName'], | |
236 | 'title': '%s - %s' % (module['title'], clip['title']), | |
237 | 'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), | |
238 | 'creator': author, | |
239 | 'formats': formats | |
240 | } | |
241 | ||
242 | ||
563772ed | 243 | class PluralsightCourseIE(PluralsightBaseIE): |
483fc223 | 244 | IE_NAME = 'pluralsight:course' |
a5cd0eb8 | 245 | _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' |
c23e2664 | 246 | _TESTS = [{ |
483fc223 S |
247 | # Free course from Pluralsight Starter Subscription for Microsoft TechNet |
248 | # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz | |
249 | 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', | |
250 | 'info_dict': { | |
251 | 'id': 'hosting-sql-server-windows-azure-iaas', | |
252 | 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', | |
253 | 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', | |
254 | }, | |
255 | 'playlist_count': 31, | |
c23e2664 S |
256 | }, { |
257 | # available without pluralsight account | |
258 | 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', | |
259 | 'only_matching': True, | |
a5cd0eb8 S |
260 | }, { |
261 | 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', | |
262 | 'only_matching': True, | |
c23e2664 | 263 | }] |
483fc223 S |
264 | |
265 | def _real_extract(self, url): | |
266 | course_id = self._match_id(url) | |
267 | ||
2b6bda1e S |
268 | # TODO: PSM cookie |
269 | ||
483fc223 | 270 | course = self._download_json( |
0533915a | 271 | '%s/data/course/%s' % (self._API_BASE, course_id), |
483fc223 S |
272 | course_id, 'Downloading course JSON') |
273 | ||
274 | title = course['title'] | |
275 | description = course.get('description') or course.get('shortDescription') | |
276 | ||
277 | course_data = self._download_json( | |
0533915a | 278 | '%s/data/course/content/%s' % (self._API_BASE, course_id), |
483fc223 S |
279 | course_id, 'Downloading course data JSON') |
280 | ||
483fc223 S |
281 | entries = [] |
282 | for module in course_data: | |
283 | for clip in module.get('clips', []): | |
483fc223 S |
284 | player_parameters = clip.get('playerParameters') |
285 | if not player_parameters: | |
286 | continue | |
287 | entries.append(self.url_result( | |
0533915a | 288 | '%s/training/player?%s' % (self._API_BASE, player_parameters), |
483fc223 S |
289 | 'Pluralsight')) |
290 | ||
483fc223 | 291 | return self.playlist_result(entries, course_id, title, description) |