]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import int_or_none, traverse_obj, unified_timestamp | |
3 | ||
4 | ||
5 | class MicrosoftEmbedIE(InfoExtractor): | |
6 | _VALID_URL = r'https?://(?:www\.)?microsoft\.com/(?:[^/]+/)?videoplayer/embed/(?P<id>[a-z0-9A-Z]+)' | |
7 | ||
8 | _TESTS = [{ | |
9 | 'url': 'https://www.microsoft.com/en-us/videoplayer/embed/RWL07e', | |
10 | 'md5': 'eb0ae9007f9b305f9acd0a03e74cb1a9', | |
11 | 'info_dict': { | |
12 | 'id': 'RWL07e', | |
13 | 'title': 'Microsoft for Public Health and Social Services', | |
14 | 'ext': 'mp4', | |
15 | 'thumbnail': 'http://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RWL7Ju?ver=cae5', | |
16 | 'age_limit': 0, | |
17 | 'timestamp': 1631658316, | |
18 | 'upload_date': '20210914' | |
19 | } | |
20 | }] | |
21 | _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/' | |
22 | ||
23 | def _real_extract(self, url): | |
24 | video_id = self._match_id(url) | |
25 | metadata = self._download_json(self._API_URL + video_id, video_id) | |
26 | ||
27 | formats = [] | |
28 | for source_type, source in metadata['streams'].items(): | |
29 | if source_type == 'smooth_Streaming': | |
30 | formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss')) | |
31 | elif source_type == 'apple_HTTP_Live_Streaming': | |
32 | formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4')) | |
33 | elif source_type == 'mPEG_DASH': | |
34 | formats.extend(self._extract_mpd_formats(source['url'], video_id)) | |
35 | else: | |
36 | formats.append({ | |
37 | 'format_id': source_type, | |
38 | 'url': source['url'], | |
39 | 'height': source.get('heightPixels'), | |
40 | 'width': source.get('widthPixels'), | |
41 | }) | |
42 | ||
43 | subtitles = { | |
44 | lang: [{ | |
45 | 'url': data.get('url'), | |
46 | 'ext': 'vtt', | |
47 | }] for lang, data in traverse_obj(metadata, 'captions', default={}).items() | |
48 | } | |
49 | ||
50 | thumbnails = [{ | |
51 | 'url': thumb.get('url'), | |
52 | 'width': thumb.get('width') or None, | |
53 | 'height': thumb.get('height') or None, | |
54 | } for thumb in traverse_obj(metadata, ('snippet', 'thumbnails', ...))] | |
55 | self._remove_duplicate_formats(thumbnails) | |
56 | ||
57 | return { | |
58 | 'id': video_id, | |
59 | 'title': traverse_obj(metadata, ('snippet', 'title')), | |
60 | 'timestamp': unified_timestamp(traverse_obj(metadata, ('snippet', 'activeStartDate'))), | |
61 | 'age_limit': int_or_none(traverse_obj(metadata, ('snippet', 'minimumAge'))) or 0, | |
62 | 'formats': formats, | |
63 | 'subtitles': subtitles, | |
64 | 'thumbnails': thumbnails, | |
65 | } |