]>
Commit | Line | Data |
---|---|---|
1 | from base64 import b64decode | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | merge_dicts, | |
6 | parse_iso8601, | |
7 | parse_duration, | |
8 | parse_resolution, | |
9 | try_get, | |
10 | url_basename, | |
11 | ) | |
12 | ||
13 | ||
14 | class MicrosoftStreamIE(InfoExtractor): | |
15 | IE_NAME = 'microsoftstream' | |
16 | IE_DESC = 'Microsoft Stream' | |
17 | _VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | |
18 | ||
19 | _TESTS = [{ | |
20 | 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0', | |
21 | 'only_matching': True, | |
22 | }, { | |
23 | 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca', | |
24 | 'only_matching': True, | |
25 | }] | |
26 | ||
27 | def _get_all_subtitles(self, api_url, video_id, headers): | |
28 | subtitles = {} | |
29 | automatic_captions = {} | |
30 | text_tracks = self._download_json( | |
31 | f'{api_url}/videos/{video_id}/texttracks', video_id, | |
32 | note='Downloading subtitles JSON', fatal=False, headers=headers, | |
33 | query={'api-version': '1.4-private'}).get('value') or [] | |
34 | for track in text_tracks: | |
35 | if not track.get('language') or not track.get('url'): | |
36 | continue | |
37 | sub_dict = automatic_captions if track.get('autoGenerated') else subtitles | |
38 | sub_dict.setdefault(track['language'], []).append({ | |
39 | 'ext': 'vtt', | |
40 | 'url': track.get('url') | |
41 | }) | |
42 | return { | |
43 | 'subtitles': subtitles, | |
44 | 'automatic_captions': automatic_captions | |
45 | } | |
46 | ||
47 | def extract_all_subtitles(self, *args, **kwargs): | |
48 | if (self.get_param('writesubtitles', False) | |
49 | or self.get_param('writeautomaticsub', False) | |
50 | or self.get_param('listsubtitles')): | |
51 | return self._get_all_subtitles(*args, **kwargs) | |
52 | return {} | |
53 | ||
54 | def _real_extract(self, url): | |
55 | video_id = self._match_id(url) | |
56 | webpage = self._download_webpage(url, video_id) | |
57 | if '<title>Microsoft Stream</title>' not in webpage: | |
58 | self.raise_login_required(method='cookies') | |
59 | ||
60 | access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token') | |
61 | api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url') | |
62 | ||
63 | headers = {'Authorization': f'Bearer {access_token}'} | |
64 | ||
65 | video_data = self._download_json( | |
66 | f'{api_url}/videos/{video_id}', video_id, | |
67 | headers=headers, query={ | |
68 | '$expand': 'creator,tokens,status,liveEvent,extensions', | |
69 | 'api-version': '1.4-private' | |
70 | }) | |
71 | video_id = video_data.get('id') or video_id | |
72 | language = video_data.get('language') | |
73 | ||
74 | thumbnails = [] | |
75 | for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'): | |
76 | thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str) | |
77 | if not thumbnail_url: | |
78 | continue | |
79 | thumb = { | |
80 | 'id': thumbnail_id, | |
81 | 'url': thumbnail_url, | |
82 | } | |
83 | thumb_name = url_basename(thumbnail_url) | |
84 | thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4))) | |
85 | thumb.update(parse_resolution(thumb_name)) | |
86 | thumbnails.append(thumb) | |
87 | ||
88 | formats = [] | |
89 | for playlist in video_data['playbackUrls']: | |
90 | if playlist['mimeType'] == 'application/vnd.apple.mpegurl': | |
91 | formats.extend(self._extract_m3u8_formats( | |
92 | playlist['playbackUrl'], video_id, | |
93 | ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', | |
94 | fatal=False, headers=headers)) | |
95 | elif playlist['mimeType'] == 'application/dash+xml': | |
96 | formats.extend(self._extract_mpd_formats( | |
97 | playlist['playbackUrl'], video_id, mpd_id='dash', | |
98 | fatal=False, headers=headers)) | |
99 | elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml': | |
100 | formats.extend(self._extract_ism_formats( | |
101 | playlist['playbackUrl'], video_id, ism_id='mss', | |
102 | fatal=False, headers=headers)) | |
103 | formats = [merge_dicts(f, {'language': language}) for f in formats] | |
104 | ||
105 | return { | |
106 | 'id': video_id, | |
107 | 'title': video_data['name'], | |
108 | 'description': video_data.get('description'), | |
109 | 'uploader': try_get(video_data, lambda x: x['creator']['name'], str), | |
110 | 'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'], | |
111 | lambda x: x['creator']['id']), str), | |
112 | 'thumbnails': thumbnails, | |
113 | **self.extract_all_subtitles(api_url, video_id, headers), | |
114 | 'timestamp': parse_iso8601(video_data.get('created')), | |
115 | 'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])), | |
116 | 'webpage_url': f'https://web.microsoftstream.com/video/{video_id}', | |
117 | 'view_count': try_get(video_data, lambda x: x['metrics']['views'], int), | |
118 | 'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int), | |
119 | 'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int), | |
120 | 'formats': formats, | |
121 | } |