]>
Commit | Line | Data |
---|---|---|
9200bc70 | 1 | import re |
2 | ||
177662e0 | 3 | from .common import InfoExtractor |
9200bc70 | 4 | from ..utils import ( |
5 | int_or_none, | |
6 | parse_iso8601, | |
7 | traverse_obj, | |
8 | unified_timestamp, | |
9 | url_basename, | |
10 | url_or_none, | |
11 | ) | |
177662e0 | 12 | |
13 | ||
14 | class MicrosoftEmbedIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://(?:www\.)?microsoft\.com/(?:[^/]+/)?videoplayer/embed/(?P<id>[a-z0-9A-Z]+)' | |
16 | ||
17 | _TESTS = [{ | |
18 | 'url': 'https://www.microsoft.com/en-us/videoplayer/embed/RWL07e', | |
19 | 'md5': 'eb0ae9007f9b305f9acd0a03e74cb1a9', | |
20 | 'info_dict': { | |
21 | 'id': 'RWL07e', | |
22 | 'title': 'Microsoft for Public Health and Social Services', | |
23 | 'ext': 'mp4', | |
24 | 'thumbnail': 'http://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RWL7Ju?ver=cae5', | |
25 | 'age_limit': 0, | |
26 | 'timestamp': 1631658316, | |
add96eb9 | 27 | 'upload_date': '20210914', |
28 | }, | |
177662e0 | 29 | }] |
30 | _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/' | |
31 | ||
32 | def _real_extract(self, url): | |
33 | video_id = self._match_id(url) | |
34 | metadata = self._download_json(self._API_URL + video_id, video_id) | |
35 | ||
36 | formats = [] | |
37 | for source_type, source in metadata['streams'].items(): | |
38 | if source_type == 'smooth_Streaming': | |
39 | formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss')) | |
40 | elif source_type == 'apple_HTTP_Live_Streaming': | |
41 | formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4')) | |
42 | elif source_type == 'mPEG_DASH': | |
43 | formats.extend(self._extract_mpd_formats(source['url'], video_id)) | |
44 | else: | |
45 | formats.append({ | |
46 | 'format_id': source_type, | |
47 | 'url': source['url'], | |
48 | 'height': source.get('heightPixels'), | |
49 | 'width': source.get('widthPixels'), | |
50 | }) | |
177662e0 | 51 | |
52 | subtitles = { | |
53 | lang: [{ | |
54 | 'url': data.get('url'), | |
55 | 'ext': 'vtt', | |
56 | }] for lang, data in traverse_obj(metadata, 'captions', default={}).items() | |
57 | } | |
58 | ||
59 | thumbnails = [{ | |
60 | 'url': thumb.get('url'), | |
61 | 'width': thumb.get('width') or None, | |
62 | 'height': thumb.get('height') or None, | |
63 | } for thumb in traverse_obj(metadata, ('snippet', 'thumbnails', ...))] | |
64 | self._remove_duplicate_formats(thumbnails) | |
65 | ||
66 | return { | |
67 | 'id': video_id, | |
68 | 'title': traverse_obj(metadata, ('snippet', 'title')), | |
69 | 'timestamp': unified_timestamp(traverse_obj(metadata, ('snippet', 'activeStartDate'))), | |
70 | 'age_limit': int_or_none(traverse_obj(metadata, ('snippet', 'minimumAge'))) or 0, | |
71 | 'formats': formats, | |
72 | 'subtitles': subtitles, | |
73 | 'thumbnails': thumbnails, | |
74 | } | |
9200bc70 | 75 | |
76 | ||
77 | class MicrosoftMediusBaseIE(InfoExtractor): | |
78 | @staticmethod | |
79 | def _sub_to_dict(subtitle_list): | |
80 | subtitles = {} | |
81 | for sub in subtitle_list: | |
82 | subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub) | |
83 | return subtitles | |
84 | ||
85 | def _extract_ism(self, ism_url, video_id): | |
86 | formats = self._extract_ism_formats(ism_url, video_id) | |
87 | for fmt in formats: | |
88 | if fmt['language'] != 'eng' and 'English' not in fmt['format_id']: | |
89 | fmt['language_preference'] = -10 | |
90 | return formats | |
91 | ||
92 | ||
93 | class MicrosoftMediusIE(MicrosoftMediusBaseIE): | |
94 | _VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)' | |
95 | ||
96 | _TESTS = [{ | |
97 | 'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b', | |
98 | 'info_dict': { | |
99 | 'id': '9640d86c-f513-4889-959e-5dace86e7d2b', | |
100 | 'ext': 'ismv', | |
101 | 'title': 'Rapidly code, test and ship from secure cloud developer environments', | |
102 | 'description': 'md5:33c8e4facadc438613476eea24165f71', | |
103 | 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', | |
104 | 'subtitles': 'count:30', | |
105 | }, | |
106 | }, { | |
107 | 'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3', | |
108 | 'info_dict': { | |
109 | 'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3', | |
110 | 'ext': 'ismv', | |
111 | 'title': 'Microsoft Build opening', | |
112 | 'description': 'md5:43455096141077a1f23144cab8cec1cb', | |
113 | 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', | |
114 | 'subtitles': 'count:31', | |
115 | }, | |
116 | }, { | |
117 | 'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c', | |
118 | 'info_dict': { | |
119 | 'id': '78493569-9b3b-4a85-a409-ee76e789e25c', | |
120 | 'ext': 'ismv', | |
121 | 'title': ' Anomaly Detection & Root cause at Edge', | |
122 | 'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83', | |
123 | 'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*', | |
124 | 'subtitles': 'count:17', | |
125 | }, | |
126 | }, { | |
127 | 'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7', | |
128 | 'only_matching': True, | |
129 | }, { | |
130 | 'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c', | |
131 | 'only_matching': True, | |
132 | }] | |
133 | ||
134 | def _extract_subtitle(self, webpage, video_id): | |
135 | captions = traverse_obj( | |
136 | self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None), | |
137 | ('languageList', lambda _, v: url_or_none(v['src']), { | |
138 | 'url': 'src', | |
139 | 'tag': ('srclang', {str}), | |
140 | 'name': ('kind', {str}), | |
141 | })) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]} | |
142 | for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)] | |
143 | ||
144 | return self._sub_to_dict(captions) | |
145 | ||
146 | def _real_extract(self, url): | |
147 | video_id = self._match_id(url) | |
148 | webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id) | |
149 | ||
150 | return { | |
151 | 'id': video_id, | |
152 | 'title': self._og_search_title(webpage), | |
153 | 'description': self._og_search_description(webpage), | |
154 | 'formats': self._extract_ism( | |
155 | self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id), | |
156 | 'thumbnail': self._og_search_thumbnail(webpage), | |
157 | 'subtitles': self._extract_subtitle(webpage, video_id), | |
158 | } | |
159 | ||
160 | ||
161 | class MicrosoftLearnPlaylistIE(InfoExtractor): | |
162 | _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)' | |
163 | _TESTS = [{ | |
164 | 'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners', | |
165 | 'info_dict': { | |
166 | 'id': 'bash-for-beginners', | |
167 | 'title': 'Bash for Beginners', | |
168 | 'description': 'md5:16a91c07222117d1e00912f0dbc02c2c', | |
169 | }, | |
170 | 'playlist_count': 20, | |
171 | }, { | |
172 | 'url': 'https://learn.microsoft.com/en-us/events/build-2022', | |
173 | 'info_dict': { | |
174 | 'id': 'build-2022', | |
175 | 'title': 'Microsoft Build 2022 - Events', | |
176 | 'description': 'md5:c16b43848027df837b22c6fbac7648d3', | |
177 | }, | |
178 | 'playlist_count': 201, | |
179 | }] | |
180 | ||
181 | def _entries(self, url_base, video_id): | |
182 | skip = 0 | |
183 | while True: | |
184 | playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={ | |
185 | 'locale': 'en-us', | |
186 | '$skip': skip, | |
187 | }) | |
188 | url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str})) | |
189 | for url_path in url_paths: | |
190 | yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}') | |
191 | skip += len(url_paths) | |
192 | if skip >= playlist_info.get('count', 0) or not url_paths: | |
193 | break | |
194 | ||
195 | def _real_extract(self, url): | |
196 | playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type') | |
197 | webpage = self._download_webpage(url, playlist_id) | |
198 | ||
199 | metainfo = { | |
200 | 'title': self._og_search_title(webpage), | |
201 | 'description': self._og_search_description(webpage), | |
202 | } | |
203 | sub_type = 'episodes' if playlist_type == 'shows' else 'sessions' | |
204 | ||
205 | url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}' | |
206 | return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo) | |
207 | ||
208 | ||
209 | class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE): | |
210 | _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)' | |
211 | _TESTS = [{ | |
212 | 'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/', | |
213 | 'info_dict': { | |
214 | 'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c', | |
215 | 'ext': 'ismv', | |
216 | 'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)', | |
217 | 'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88', | |
218 | 'timestamp': 1676339547, | |
219 | 'upload_date': '20230214', | |
220 | 'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png', | |
221 | 'subtitles': 'count:14', | |
222 | }, | |
223 | }] | |
224 | ||
225 | def _real_extract(self, url): | |
226 | video_id = self._match_id(url) | |
227 | webpage = self._download_webpage(url, video_id) | |
228 | ||
229 | entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True) | |
230 | video_info = self._download_json( | |
231 | f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id) | |
232 | return { | |
233 | 'id': entry_id, | |
234 | 'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id), | |
235 | 'subtitles': self._sub_to_dict(traverse_obj(video_info, ( | |
236 | 'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), { | |
237 | 'tag': ('language', {str}), | |
238 | 'url': 'url', | |
239 | }))), | |
240 | 'title': self._og_search_title(webpage), | |
241 | 'description': self._og_search_description(webpage), | |
242 | **traverse_obj(video_info, { | |
243 | 'timestamp': ('createTime', {parse_iso8601}), | |
244 | 'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}), | |
245 | }), | |
246 | } | |
247 | ||
248 | ||
249 | class MicrosoftLearnSessionIE(InfoExtractor): | |
250 | _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)' | |
251 | _TESTS = [{ | |
252 | 'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments', | |
253 | 'info_dict': { | |
254 | 'id': '9640d86c-f513-4889-959e-5dace86e7d2b', | |
255 | 'ext': 'ismv', | |
256 | 'title': 'Rapidly code, test and ship from secure cloud developer environments - Events', | |
257 | 'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3', | |
258 | 'timestamp': 1653408600, | |
259 | 'upload_date': '20220524', | |
260 | 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', | |
261 | }, | |
262 | }] | |
263 | ||
264 | def _real_extract(self, url): | |
265 | video_id = self._match_id(url) | |
266 | webpage = self._download_webpage(url, video_id) | |
267 | ||
268 | metainfo = { | |
269 | 'title': self._og_search_title(webpage), | |
270 | 'description': self._og_search_description(webpage), | |
271 | 'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')), | |
272 | } | |
273 | ||
274 | return self.url_result( | |
275 | self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True), | |
276 | url_transparent=True, ie=MicrosoftMediusIE, **metainfo) | |
277 | ||
278 | ||
279 | class MicrosoftBuildIE(InfoExtractor): | |
280 | _VALID_URL = [ | |
281 | r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)', | |
282 | r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)', | |
283 | ] | |
284 | ||
285 | _TESTS = [{ | |
286 | 'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions', | |
287 | 'info_dict': { | |
288 | 'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554', | |
289 | 'ext': 'ismv', | |
290 | 'title': 'Microsoft Build opening keynote', | |
291 | 'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655', | |
292 | 'timestamp': 1716307200, | |
293 | 'upload_date': '20240521', | |
294 | 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', | |
295 | }, | |
296 | }, { | |
297 | 'url': 'https://build.microsoft.com/en-US/sessions', | |
298 | 'info_dict': { | |
299 | 'id': 'sessions', | |
300 | }, | |
301 | 'playlist_mincount': 418, | |
302 | }] | |
303 | ||
304 | def _real_extract(self, url): | |
305 | video_id = self._match_id(url) | |
306 | ||
307 | entries = [ | |
308 | self.url_result( | |
309 | video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, { | |
310 | 'id': ('sessionId', {str}), | |
311 | 'title': ('title', {str}), | |
312 | 'description': ('description', {str}), | |
313 | 'timestamp': ('startDateTime', {parse_iso8601}), | |
314 | })) | |
315 | for video_info in self._download_json( | |
316 | 'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info') | |
317 | ] | |
318 | if video_id == 'sessions': | |
319 | return self.playlist_result(entries, video_id) | |
320 | else: | |
321 | return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False) |