]>
Commit | Line | Data |
---|---|---|
aa7e974a RA |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | ExtractorError, | |
9 | float_or_none, | |
10 | int_or_none, | |
11 | urlencode_postdata, | |
c9120294 | 12 | urljoin, |
aa7e974a RA |
13 | ) |
14 | ||
15 | ||
16 | class LinkedInLearningBaseIE(InfoExtractor): | |
17 | _NETRC_MACHINE = 'linkedin' | |
c9120294 | 18 | _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' |
aa7e974a RA |
19 | |
20 | def _call_api(self, course_slug, fields, video_slug=None, resolution=None): | |
21 | query = { | |
22 | 'courseSlug': course_slug, | |
23 | 'fields': fields, | |
24 | 'q': 'slugs', | |
25 | } | |
26 | sub = '' | |
27 | if video_slug: | |
28 | query.update({ | |
29 | 'videoSlug': video_slug, | |
30 | 'resolution': '_%s' % resolution, | |
31 | }) | |
32 | sub = ' %dp' % resolution | |
33 | api_url = 'https://www.linkedin.com/learning-api/detailedCourses' | |
34 | return self._download_json( | |
35 | api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ | |
36 | 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, | |
37 | }, query=query)['elements'][0] | |
38 | ||
91effe22 RA |
39 | def _get_urn_id(self, video_data): |
40 | urn = video_data.get('urn') | |
aa7e974a RA |
41 | if urn: |
42 | mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) | |
43 | if mobj: | |
44 | return mobj.group(1) | |
91effe22 RA |
45 | |
46 | def _get_video_id(self, video_data, course_slug, video_slug): | |
47 | return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) | |
aa7e974a RA |
48 | |
49 | def _real_initialize(self): | |
50 | email, password = self._get_login_info() | |
51 | if email is None: | |
52 | return | |
53 | ||
54 | login_page = self._download_webpage( | |
c9120294 RA |
55 | self._LOGIN_URL, None, 'Downloading login page') |
56 | action_url = urljoin(self._LOGIN_URL, self._search_regex( | |
aa7e974a | 57 | r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url', |
c9120294 | 58 | default='https://www.linkedin.com/uas/login-submit', group='url')) |
aa7e974a RA |
59 | data = self._hidden_inputs(login_page) |
60 | data.update({ | |
61 | 'session_key': email, | |
62 | 'session_password': password, | |
63 | }) | |
64 | login_submit_page = self._download_webpage( | |
65 | action_url, None, 'Logging in', | |
66 | data=urlencode_postdata(data)) | |
67 | error = self._search_regex( | |
68 | r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>', | |
69 | login_submit_page, 'error', default=None) | |
70 | if error: | |
71 | raise ExtractorError(error, expected=True) | |
72 | ||
73 | ||
74 | class LinkedInLearningIE(LinkedInLearningBaseIE): | |
75 | IE_NAME = 'linkedin:learning' | |
76 | _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)' | |
77 | _TEST = { | |
78 | 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true', | |
79 | 'md5': 'a1d74422ff0d5e66a792deb996693167', | |
80 | 'info_dict': { | |
81 | 'id': '90426', | |
82 | 'ext': 'mp4', | |
83 | 'title': 'Welcome', | |
84 | 'timestamp': 1430396150.82, | |
85 | 'upload_date': '20150430', | |
86 | }, | |
87 | } | |
88 | ||
89 | def _real_extract(self, url): | |
90 | course_slug, video_slug = re.match(self._VALID_URL, url).groups() | |
91 | ||
92 | video_data = None | |
93 | formats = [] | |
94 | for width, height in ((640, 360), (960, 540), (1280, 720)): | |
95 | video_data = self._call_api( | |
96 | course_slug, 'selectedVideo', video_slug, height)['selectedVideo'] | |
97 | ||
98 | video_url_data = video_data.get('url') or {} | |
99 | progressive_url = video_url_data.get('progressiveUrl') | |
100 | if progressive_url: | |
101 | formats.append({ | |
102 | 'format_id': 'progressive-%dp' % height, | |
103 | 'url': progressive_url, | |
104 | 'height': height, | |
105 | 'width': width, | |
106 | 'source_preference': 1, | |
107 | }) | |
108 | ||
109 | title = video_data['title'] | |
110 | ||
111 | audio_url = video_data.get('audio', {}).get('progressiveUrl') | |
112 | if audio_url: | |
113 | formats.append({ | |
114 | 'abr': 64, | |
115 | 'ext': 'm4a', | |
116 | 'format_id': 'audio', | |
117 | 'url': audio_url, | |
118 | 'vcodec': 'none', | |
119 | }) | |
120 | ||
121 | streaming_url = video_url_data.get('streamingUrl') | |
122 | if streaming_url: | |
123 | formats.extend(self._extract_m3u8_formats( | |
124 | streaming_url, video_slug, 'mp4', | |
125 | 'm3u8_native', m3u8_id='hls', fatal=False)) | |
126 | ||
127 | self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) | |
128 | ||
129 | return { | |
91effe22 | 130 | 'id': self._get_video_id(video_data, course_slug, video_slug), |
aa7e974a RA |
131 | 'title': title, |
132 | 'formats': formats, | |
133 | 'thumbnail': video_data.get('defaultThumbnail'), | |
134 | 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), | |
135 | 'duration': int_or_none(video_data.get('durationInSeconds')), | |
136 | } | |
137 | ||
138 | ||
139 | class LinkedInLearningCourseIE(LinkedInLearningBaseIE): | |
140 | IE_NAME = 'linkedin:learning:course' | |
141 | _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)' | |
142 | _TEST = { | |
143 | 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals', | |
144 | 'info_dict': { | |
145 | 'id': 'programming-foundations-fundamentals', | |
146 | 'title': 'Programming Foundations: Fundamentals', | |
147 | 'description': 'md5:76e580b017694eb89dc8e8923fff5c86', | |
148 | }, | |
149 | 'playlist_mincount': 61, | |
150 | } | |
151 | ||
152 | @classmethod | |
153 | def suitable(cls, url): | |
154 | return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url) | |
155 | ||
156 | def _real_extract(self, url): | |
157 | course_slug = self._match_id(url) | |
158 | course_data = self._call_api(course_slug, 'chapters,description,title') | |
159 | ||
160 | entries = [] | |
91effe22 | 161 | for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1): |
aa7e974a | 162 | chapter_title = chapter.get('title') |
91effe22 | 163 | chapter_id = self._get_urn_id(chapter) |
aa7e974a RA |
164 | for video in chapter.get('videos', []): |
165 | video_slug = video.get('slug') | |
166 | if not video_slug: | |
167 | continue | |
168 | entries.append({ | |
b1447572 | 169 | '_type': 'url_transparent', |
91effe22 | 170 | 'id': self._get_video_id(video, course_slug, video_slug), |
aa7e974a RA |
171 | 'title': video.get('title'), |
172 | 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), | |
173 | 'chapter': chapter_title, | |
91effe22 RA |
174 | 'chapter_number': chapter_number, |
175 | 'chapter_id': chapter_id, | |
aa7e974a RA |
176 | 'ie_key': LinkedInLearningIE.ie_key(), |
177 | }) | |
178 | ||
179 | return self.playlist_result( | |
180 | entries, course_slug, | |
181 | course_data.get('title'), | |
182 | course_data.get('description')) |