3 from .common
import InfoExtractor
14 class MicrosoftStreamIE(InfoExtractor
):
15 IE_NAME
= 'microsoftstream'
16 IE_DESC
= 'Microsoft Stream'
17 _VALID_URL
= r
'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
20 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0',
21 'only_matching': True,
23 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca',
24 'only_matching': True,
27 def _get_all_subtitles(self
, api_url
, video_id
, headers
):
29 automatic_captions
= {}
30 text_tracks
= self
._download
_json
(
31 f
'{api_url}/videos/{video_id}/texttracks', video_id
,
32 note
='Downloading subtitles JSON', fatal
=False, headers
=headers
,
33 query
={'api-version': '1.4-private'}
).get('value') or []
34 for track
in text_tracks
:
35 if not track
.get('language') or not track
.get('url'):
37 sub_dict
= automatic_captions
if track
.get('autoGenerated') else subtitles
38 sub_dict
.setdefault(track
['language'], []).append({
40 'url': track
.get('url')
43 'subtitles': subtitles
,
44 'automatic_captions': automatic_captions
47 def extract_all_subtitles(self
, *args
, **kwargs
):
48 if (self
.get_param('writesubtitles', False)
49 or self
.get_param('writeautomaticsub', False)
50 or self
.get_param('listsubtitles')):
51 return self
._get
_all
_subtitles
(*args
, **kwargs
)
54 def _real_extract(self
, url
):
55 video_id
= self
._match
_id
(url
)
56 webpage
= self
._download
_webpage
(url
, video_id
)
57 if '<title>Microsoft Stream</title>' not in webpage
:
58 self
.raise_login_required(method
='cookies')
60 access_token
= self
._html
_search
_regex
(r
'"AccessToken":"(.+?)"', webpage
, 'access token')
61 api_url
= self
._html
_search
_regex
(r
'"ApiGatewayUri":"(.+?)"', webpage
, 'api url')
63 headers
= {'Authorization': f'Bearer {access_token}
'}
65 video_data = self._download_json(
66 f'{api_url}
/videos
/{video_id}
', video_id,
67 headers=headers, query={
68 '$expand
': 'creator
,tokens
,status
,liveEvent
,extensions
',
69 'api
-version
': '1.4-private
'
71 video_id = video_data.get('id') or video_id
72 language = video_data.get('language
')
75 for thumbnail_id in ('extraSmall
', 'small
', 'medium
', 'large
'):
76 thumbnail_url = try_get(video_data, lambda x: x['posterImage
'][thumbnail_id]['url
'], str)
83 thumb_name = url_basename(thumbnail_url)
84 thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
85 thumb.update(parse_resolution(thumb_name))
86 thumbnails.append(thumb)
89 for playlist in video_data['playbackUrls
']:
90 if playlist['mimeType
'] == 'application
/vnd
.apple
.mpegurl
':
91 formats.extend(self._extract_m3u8_formats(
92 playlist['playbackUrl
'], video_id,
93 ext='mp4
', entry_protocol='m3u8_native
', m3u8_id='hls
',
94 fatal=False, headers=headers))
95 elif playlist['mimeType
'] == 'application
/dash
+xml
':
96 formats.extend(self._extract_mpd_formats(
97 playlist['playbackUrl
'], video_id, mpd_id='dash
',
98 fatal=False, headers=headers))
99 elif playlist['mimeType
'] == 'application
/vnd
.ms
-sstr
+xml
':
100 formats.extend(self._extract_ism_formats(
101 playlist['playbackUrl
'], video_id, ism_id='mss
',
102 fatal=False, headers=headers))
103 formats = [merge_dicts(f, {'language': language}) for f in formats]
107 'title
': video_data['name
'],
108 'description
': video_data.get('description
'),
109 'uploader
': try_get(video_data, lambda x: x['creator
']['name
'], str),
110 'uploader_id
': try_get(video_data, (lambda x: x['creator
']['mail
'],
111 lambda x: x['creator
']['id']), str),
112 'thumbnails
': thumbnails,
113 **self.extract_all_subtitles(api_url, video_id, headers),
114 'timestamp
': parse_iso8601(video_data.get('created
')),
115 'duration
': parse_duration(try_get(video_data, lambda x: x['media
']['duration
'])),
116 'webpage_url
': f'https
://web
.microsoftstream
.com
/video
/{video_id}
',
117 'view_count
': try_get(video_data, lambda x: x['metrics
']['views
'], int),
118 'like_count
': try_get(video_data, lambda x: x['metrics
']['likes
'], int),
119 'comment_count
': try_get(video_data, lambda x: x['metrics
']['comments
'], int),