]>
Commit | Line | Data |
---|---|---|
483fc223 S |
1 | from __future__ import unicode_literals |
2 | ||
8c3e35dd | 3 | import collections |
483fc223 | 4 | import json |
8c3e35dd | 5 | import os |
38eb2968 | 6 | import random |
483fc223 S |
7 | |
8 | from .common import InfoExtractor | |
9 | from ..compat import ( | |
10 | compat_str, | |
483fc223 S |
11 | compat_urlparse, |
12 | ) | |
13 | from ..utils import ( | |
425f3fdf | 14 | dict_get, |
483fc223 | 15 | ExtractorError, |
8c3e35dd | 16 | float_or_none, |
483fc223 S |
17 | int_or_none, |
18 | parse_duration, | |
756926ff | 19 | qualities, |
8c3e35dd | 20 | srt_subtitles_timecode, |
6e6bc8da | 21 | urlencode_postdata, |
483fc223 S |
22 | ) |
23 | ||
24 | ||
563772ed | 25 | class PluralsightBaseIE(InfoExtractor): |
9df6b03c | 26 | _API_BASE = 'https://app.pluralsight.com' |
563772ed S |
27 | |
28 | ||
29 | class PluralsightIE(PluralsightBaseIE): | |
483fc223 | 30 | IE_NAME = 'pluralsight' |
b0dfcab6 | 31 | _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' |
c3a227d1 | 32 | _LOGIN_URL = 'https://app.pluralsight.com/id/' |
563772ed | 33 | |
483fc223 S |
34 | _NETRC_MACHINE = 'pluralsight' |
35 | ||
71bd93b8 | 36 | _TESTS = [{ |
483fc223 S |
37 | 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', |
38 | 'md5': '4d458cf5cf4c593788672419a8dd4cf8', | |
39 | 'info_dict': { | |
40 | 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', | |
41 | 'ext': 'mp4', | |
42 | 'title': 'Management of SQL Server - Demo Monitoring', | |
43 | 'duration': 338, | |
44 | }, | |
45 | 'skip': 'Requires pluralsight account credentials', | |
71bd93b8 S |
46 | }, { |
47 | 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', | |
48 | 'only_matching': True, | |
c23e2664 S |
49 | }, { |
50 | # available without pluralsight account | |
51 | 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', | |
52 | 'only_matching': True, | |
b0dfcab6 S |
53 | }, { |
54 | 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', | |
55 | 'only_matching': True, | |
71bd93b8 | 56 | }] |
483fc223 S |
57 | |
58 | def _real_initialize(self): | |
59 | self._login() | |
60 | ||
61 | def _login(self): | |
62 | (username, password) = self._get_login_info() | |
63 | if username is None: | |
c23e2664 | 64 | return |
483fc223 S |
65 | |
66 | login_page = self._download_webpage( | |
67 | self._LOGIN_URL, None, 'Downloading login page') | |
68 | ||
69 | login_form = self._hidden_inputs(login_page) | |
70 | ||
71 | login_form.update({ | |
244cd042 S |
72 | 'Username': username, |
73 | 'Password': password, | |
483fc223 S |
74 | }) |
75 | ||
76 | post_url = self._search_regex( | |
77 | r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, | |
78 | 'post url', default=self._LOGIN_URL, group='url') | |
79 | ||
80 | if not post_url.startswith('http'): | |
81 | post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) | |
82 | ||
483fc223 | 83 | response = self._download_webpage( |
30317f48 S |
84 | post_url, None, 'Logging in as %s' % username, |
85 | data=urlencode_postdata(login_form), | |
86 | headers={'Content-Type': 'application/x-www-form-urlencoded'}) | |
483fc223 S |
87 | |
88 | error = self._search_regex( | |
89 | r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', | |
90 | response, 'error message', default=None) | |
91 | if error: | |
92 | raise ExtractorError('Unable to login: %s' % error, expected=True) | |
93 | ||
7e508ff2 S |
94 | if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): |
95 | raise ExtractorError('Unable to log in') | |
96 | ||
8c3e35dd S |
97 | def _get_subtitles(self, author, clip_id, lang, name, duration, video_id): |
98 | captions_post = { | |
99 | 'a': author, | |
100 | 'cn': clip_id, | |
101 | 'lc': lang, | |
102 | 'm': name, | |
103 | } | |
104 | captions = self._download_json( | |
9df6b03c | 105 | '%s/player/retrieve-captions' % self._API_BASE, video_id, |
8c3e35dd S |
106 | 'Downloading captions JSON', 'Unable to download captions JSON', |
107 | fatal=False, data=json.dumps(captions_post).encode('utf-8'), | |
108 | headers={'Content-Type': 'application/json;charset=utf-8'}) | |
109 | if captions: | |
110 | return { | |
111 | lang: [{ | |
112 | 'ext': 'json', | |
113 | 'data': json.dumps(captions), | |
114 | }, { | |
115 | 'ext': 'srt', | |
116 | 'data': self._convert_subtitles(duration, captions), | |
117 | }] | |
118 | } | |
119 | ||
120 | @staticmethod | |
121 | def _convert_subtitles(duration, subs): | |
122 | srt = '' | |
425f3fdf S |
123 | TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') |
124 | TEXT_KEYS = ('text', 'Text') | |
8c3e35dd S |
125 | for num, current in enumerate(subs): |
126 | current = subs[num] | |
425f3fdf S |
127 | start, text = ( |
128 | float_or_none(dict_get(current, TIME_OFFSET_KEYS)), | |
129 | dict_get(current, TEXT_KEYS)) | |
8c3e35dd S |
130 | if start is None or text is None: |
131 | continue | |
132 | end = duration if num == len(subs) - 1 else float_or_none( | |
425f3fdf | 133 | dict_get(subs[num + 1], TIME_OFFSET_KEYS)) |
30317f48 S |
134 | if end is None: |
135 | continue | |
8c3e35dd S |
136 | srt += os.linesep.join( |
137 | ( | |
138 | '%d' % num, | |
139 | '%s --> %s' % ( | |
140 | srt_subtitles_timecode(start), | |
141 | srt_subtitles_timecode(end)), | |
142 | text, | |
143 | os.linesep, | |
144 | )) | |
145 | return srt | |
146 | ||
483fc223 | 147 | def _real_extract(self, url): |
71bd93b8 S |
148 | qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) |
149 | ||
150 | author = qs.get('author', [None])[0] | |
151 | name = qs.get('name', [None])[0] | |
152 | clip_id = qs.get('clip', [None])[0] | |
9df6b03c | 153 | course_name = qs.get('course', [None])[0] |
71bd93b8 | 154 | |
9df6b03c | 155 | if any(not f for f in (author, name, clip_id, course_name,)): |
71bd93b8 | 156 | raise ExtractorError('Invalid URL', expected=True) |
483fc223 S |
157 | |
158 | display_id = '%s-%s' % (name, clip_id) | |
159 | ||
9df6b03c | 160 | parsed_url = compat_urlparse.urlparse(url) |
483fc223 | 161 | |
9df6b03c S |
162 | payload_url = compat_urlparse.urlunparse(parsed_url._replace( |
163 | netloc='app.pluralsight.com', path='player/api/v1/payload')) | |
02f0da20 | 164 | |
9df6b03c S |
165 | course = self._download_json( |
166 | payload_url, display_id, headers={'Referer': url})['payload']['course'] | |
167 | ||
168 | collection = course['modules'] | |
483fc223 S |
169 | |
170 | module, clip = None, None | |
171 | ||
172 | for module_ in collection: | |
02f0da20 | 173 | if name in (module_.get('moduleName'), module_.get('name')): |
483fc223 S |
174 | module = module_ |
175 | for clip_ in module_.get('clips', []): | |
176 | clip_index = clip_.get('clipIndex') | |
02f0da20 S |
177 | if clip_index is None: |
178 | clip_index = clip_.get('index') | |
483fc223 S |
179 | if clip_index is None: |
180 | continue | |
181 | if compat_str(clip_index) == clip_id: | |
182 | clip = clip_ | |
183 | break | |
184 | ||
185 | if not clip: | |
186 | raise ExtractorError('Unable to resolve clip') | |
187 | ||
8c3e35dd S |
188 | title = '%s - %s' % (module['title'], clip['title']) |
189 | ||
483fc223 S |
190 | QUALITIES = { |
191 | 'low': {'width': 640, 'height': 480}, | |
192 | 'medium': {'width': 848, 'height': 640}, | |
193 | 'high': {'width': 1024, 'height': 768}, | |
756926ff | 194 | 'high-widescreen': {'width': 1280, 'height': 720}, |
483fc223 S |
195 | } |
196 | ||
756926ff S |
197 | QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) |
198 | quality_key = qualities(QUALITIES_PREFERENCE) | |
199 | ||
4c57b485 S |
200 | AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) |
201 | ||
483fc223 | 202 | ALLOWED_QUALITIES = ( |
756926ff S |
203 | AllowedQuality('webm', ['high', ]), |
204 | AllowedQuality('mp4', ['low', 'medium', 'high', ]), | |
483fc223 S |
205 | ) |
206 | ||
756926ff S |
207 | # Some courses also offer widescreen resolution for high quality (see |
208 | # https://github.com/rg3/youtube-dl/issues/7766) | |
9df6b03c | 209 | widescreen = course.get('supportsWideScreenVideoFormats') is True |
756926ff S |
210 | best_quality = 'high-widescreen' if widescreen else 'high' |
211 | if widescreen: | |
212 | for allowed_quality in ALLOWED_QUALITIES: | |
213 | allowed_quality.qualities.append(best_quality) | |
214 | ||
cf186b77 S |
215 | # In order to minimize the number of calls to ViewClip API and reduce |
216 | # the probability of being throttled or banned by Pluralsight we will request | |
0eebf34d | 217 | # only single format until formats listing was explicitly requested. |
4c57b485 S |
218 | if self._downloader.params.get('listformats', False): |
219 | allowed_qualities = ALLOWED_QUALITIES | |
220 | else: | |
221 | def guess_allowed_qualities(): | |
222 | req_format = self._downloader.params.get('format') or 'best' | |
edc70f4a | 223 | req_format_split = req_format.split('-', 1) |
4c57b485 S |
224 | if len(req_format_split) > 1: |
225 | req_ext, req_quality = req_format_split | |
226 | for allowed_quality in ALLOWED_QUALITIES: | |
227 | if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: | |
228 | return (AllowedQuality(req_ext, (req_quality, )), ) | |
229 | req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4' | |
756926ff | 230 | return (AllowedQuality(req_ext, (best_quality, )), ) |
4c57b485 S |
231 | allowed_qualities = guess_allowed_qualities() |
232 | ||
483fc223 | 233 | formats = [] |
756926ff S |
234 | for ext, qualities_ in allowed_qualities: |
235 | for quality in qualities_: | |
483fc223 S |
236 | f = QUALITIES[quality].copy() |
237 | clip_post = { | |
9df6b03c S |
238 | 'author': author, |
239 | 'includeCaptions': False, | |
240 | 'clipIndex': int(clip_id), | |
241 | 'courseName': course_name, | |
242 | 'locale': 'en', | |
243 | 'moduleName': name, | |
244 | 'mediaType': ext, | |
245 | 'quality': '%dx%d' % (f['width'], f['height']), | |
483fc223 | 246 | } |
483fc223 | 247 | format_id = '%s-%s' % (ext, quality) |
f8ae2c7f | 248 | viewclip = self._download_json( |
9df6b03c | 249 | '%s/video/clips/viewclip' % self._API_BASE, display_id, |
f8ae2c7f | 250 | 'Downloading %s viewclip JSON' % format_id, fatal=False, |
30317f48 S |
251 | data=json.dumps(clip_post).encode('utf-8'), |
252 | headers={'Content-Type': 'application/json;charset=utf-8'}) | |
38eb2968 S |
253 | |
254 | # Pluralsight tracks multiple sequential calls to ViewClip API and start | |
255 | # to return 429 HTTP errors after some time (see | |
256 | # https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead | |
257 | # to account ban (see https://github.com/rg3/youtube-dl/issues/6842). | |
258 | # To somewhat reduce the probability of these consequences | |
259 | # we will sleep random amount of time before each call to ViewClip. | |
260 | self._sleep( | |
261 | random.randint(2, 5), display_id, | |
262 | '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') | |
263 | ||
f8ae2c7f | 264 | if not viewclip: |
483fc223 | 265 | continue |
f8ae2c7f S |
266 | |
267 | clip_urls = viewclip.get('urls') | |
268 | if not isinstance(clip_urls, list): | |
269 | continue | |
270 | ||
271 | for clip_url_data in clip_urls: | |
272 | clip_url = clip_url_data.get('url') | |
273 | if not clip_url: | |
274 | continue | |
275 | cdn = clip_url_data.get('cdn') | |
276 | clip_f = f.copy() | |
277 | clip_f.update({ | |
278 | 'url': clip_url, | |
279 | 'ext': ext, | |
280 | 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, | |
281 | 'quality': quality_key(quality), | |
282 | 'source_preference': int_or_none(clip_url_data.get('rank')), | |
283 | }) | |
284 | formats.append(clip_f) | |
285 | ||
483fc223 S |
286 | self._sort_formats(formats) |
287 | ||
8c3e35dd S |
288 | duration = int_or_none( |
289 | clip.get('duration')) or parse_duration(clip.get('formattedDuration')) | |
290 | ||
291 | # TODO: other languages? | |
292 | subtitles = self.extract_subtitles( | |
293 | author, clip_id, 'en', name, duration, display_id) | |
483fc223 S |
294 | |
295 | return { | |
bc0550c2 | 296 | 'id': clip.get('clipName') or clip['name'], |
8c3e35dd S |
297 | 'title': title, |
298 | 'duration': duration, | |
483fc223 | 299 | 'creator': author, |
8c3e35dd S |
300 | 'formats': formats, |
301 | 'subtitles': subtitles, | |
483fc223 S |
302 | } |
303 | ||
304 | ||
563772ed | 305 | class PluralsightCourseIE(PluralsightBaseIE): |
483fc223 | 306 | IE_NAME = 'pluralsight:course' |
a5cd0eb8 | 307 | _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' |
c23e2664 | 308 | _TESTS = [{ |
483fc223 S |
309 | # Free course from Pluralsight Starter Subscription for Microsoft TechNet |
310 | # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz | |
311 | 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', | |
312 | 'info_dict': { | |
313 | 'id': 'hosting-sql-server-windows-azure-iaas', | |
314 | 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', | |
315 | 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', | |
316 | }, | |
317 | 'playlist_count': 31, | |
c23e2664 S |
318 | }, { |
319 | # available without pluralsight account | |
320 | 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', | |
321 | 'only_matching': True, | |
a5cd0eb8 S |
322 | }, { |
323 | 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', | |
324 | 'only_matching': True, | |
c23e2664 | 325 | }] |
483fc223 S |
326 | |
327 | def _real_extract(self, url): | |
328 | course_id = self._match_id(url) | |
329 | ||
2b6bda1e S |
330 | # TODO: PSM cookie |
331 | ||
483fc223 | 332 | course = self._download_json( |
0533915a | 333 | '%s/data/course/%s' % (self._API_BASE, course_id), |
483fc223 S |
334 | course_id, 'Downloading course JSON') |
335 | ||
336 | title = course['title'] | |
337 | description = course.get('description') or course.get('shortDescription') | |
338 | ||
339 | course_data = self._download_json( | |
0533915a | 340 | '%s/data/course/content/%s' % (self._API_BASE, course_id), |
483fc223 S |
341 | course_id, 'Downloading course data JSON') |
342 | ||
483fc223 | 343 | entries = [] |
8018028d | 344 | for num, module in enumerate(course_data, 1): |
483fc223 | 345 | for clip in module.get('clips', []): |
483fc223 S |
346 | player_parameters = clip.get('playerParameters') |
347 | if not player_parameters: | |
348 | continue | |
8018028d S |
349 | entries.append({ |
350 | '_type': 'url_transparent', | |
351 | 'url': '%s/training/player?%s' % (self._API_BASE, player_parameters), | |
352 | 'ie_key': PluralsightIE.ie_key(), | |
353 | 'chapter': module.get('title'), | |
354 | 'chapter_number': num, | |
355 | 'chapter_id': module.get('moduleRef'), | |
356 | }) | |
483fc223 | 357 | |
483fc223 | 358 | return self.playlist_result(entries, course_id, title, description) |