]>
Commit | Line | Data |
---|---|---|
9c634ef8 | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
9c634ef8 | 4 | from ..utils import ( |
5 | int_or_none, | |
6 | js_to_json, | |
7 | parse_duration, | |
8 | traverse_obj, | |
9 | try_get, | |
e897bd82 | 10 | urljoin, |
9c634ef8 | 11 | ) |
12 | ||
13 | ||
14 | class MainStreamingIE(InfoExtractor): | |
b634ba74 | 15 | _VALID_URL = r'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn\.net)/(?:embed|amp_embed|content)/(?P<id>\w+)' |
bfd973ec | 16 | _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?'] |
9c634ef8 | 17 | IE_DESC = 'MainStreaming Player' |
18 | ||
19 | _TESTS = [ | |
20 | { | |
21 | # Live stream offline, has alternative content id | |
22 | 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC', | |
23 | 'info_dict': { | |
24 | 'id': '53EN6GxbWaJC', | |
25 | 'title': 'Diretta homepage 2021-12-31 12:00', | |
26 | 'description': '', | |
27 | 'live_status': 'was_live', | |
28 | 'ext': 'mp4', | |
29 | 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', | |
30 | }, | |
31 | 'expected_warnings': [ | |
32 | 'Ignoring alternative content ID: WDAF1KOWUpH3', | |
add96eb9 | 33 | 'MainStreaming said: Live event is OFFLINE', |
9c634ef8 | 34 | ], |
add96eb9 | 35 | 'skip': 'live stream offline', |
9c634ef8 | 36 | }, { |
37 | # playlist | |
38 | 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3', | |
39 | 'info_dict': { | |
40 | 'id': 'WDAF1KOWUpH3', | |
41 | 'title': 'Playlist homepage', | |
42 | }, | |
add96eb9 | 43 | 'playlist_mincount': 2, |
9c634ef8 | 44 | }, { |
45 | # livestream | |
46 | 'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw', | |
47 | 'info_dict': { | |
48 | 'id': 'tDoFkZD3T1Lw', | |
49 | 'title': r're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
50 | 'live_status': 'is_live', | |
51 | 'ext': 'mp4', | |
52 | 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', | |
53 | }, | |
add96eb9 | 54 | 'skip': 'live stream', |
9c634ef8 | 55 | }, { |
56 | 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false', | |
57 | 'info_dict': { | |
58 | 'id': 'EUlZfGWkGpOd', | |
59 | 'title': 'La Settimana ', | |
60 | 'description': '03 Ottobre ore 02:00', | |
61 | 'ext': 'mp4', | |
62 | 'live_status': 'not_live', | |
63 | 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', | |
add96eb9 | 64 | 'duration': 1512, |
65 | }, | |
9c634ef8 | 66 | }, { |
67 | # video without webtools- prefix | |
68 | 'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445', | |
69 | 'info_dict': { | |
70 | 'id': 'MfuWmzL2lGkA', | |
71 | 'title': 'TG Mattina', | |
72 | 'description': '06 Ottobre ore 08:00', | |
73 | 'ext': 'mp4', | |
74 | 'live_status': 'not_live', | |
75 | 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', | |
add96eb9 | 76 | 'duration': 789.04, |
77 | }, | |
9c634ef8 | 78 | }, { |
79 | # always-on livestream with DVR | |
80 | 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy', | |
81 | 'info_dict': { | |
82 | 'id': 'HVvPMzy', | |
83 | 'title': r're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
84 | 'description': 'canale all news', | |
85 | 'live_status': 'is_live', | |
86 | 'ext': 'mp4', | |
87 | 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', | |
88 | }, | |
89 | 'params': { | |
90 | 'skip_download': True, | |
91 | }, | |
92 | }, { | |
93 | # no host | |
94 | 'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA', | |
add96eb9 | 95 | 'only_matching': True, |
9c634ef8 | 96 | }, { |
97 | 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw', | |
add96eb9 | 98 | 'only_matching': True, |
9c634ef8 | 99 | }, { |
100 | 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#', | |
add96eb9 | 101 | 'only_matching': True, |
102 | }, | |
9c634ef8 | 103 | ] |
104 | ||
9c634ef8 | 105 | def _playlist_entries(self, host, playlist_content): |
106 | for entry in playlist_content: | |
107 | content_id = entry.get('contentID') | |
108 | yield { | |
109 | '_type': 'url', | |
110 | 'ie_key': MainStreamingIE.ie_key(), | |
111 | 'id': content_id, | |
112 | 'duration': int_or_none(traverse_obj(entry, ('duration', 'totalSeconds'))), | |
113 | 'title': entry.get('title'), | |
add96eb9 | 114 | 'url': f'https://{host}/embed/{content_id}', |
9c634ef8 | 115 | } |
116 | ||
117 | @staticmethod | |
118 | def _get_webtools_host(host): | |
119 | if not host.startswith('webtools'): | |
120 | host = 'webtools' + ('-' if not host.startswith('.') else '') + host | |
121 | return host | |
122 | ||
123 | def _get_webtools_base_url(self, host): | |
124 | return f'{self.http_scheme()}//{self._get_webtools_host(host)}' | |
125 | ||
126 | def _call_api(self, host: str, path: str, item_id: str, query=None, note='Downloading API JSON', fatal=False): | |
127 | # JSON API, does not appear to be documented | |
128 | return self._call_webtools_api(host, '/api/v2/' + path, item_id, query, note, fatal) | |
129 | ||
130 | def _call_webtools_api(self, host: str, path: str, item_id: str, query=None, note='Downloading webtools API JSON', fatal=False): | |
131 | # webtools docs: https://webtools.msvdn.net/ | |
132 | return self._download_json( | |
133 | urljoin(self._get_webtools_base_url(host), path), item_id, query=query, note=note, fatal=fatal) | |
134 | ||
135 | def _real_extract(self, url): | |
136 | host, video_id = self._match_valid_url(url).groups() | |
137 | content_info = try_get( | |
138 | self._call_api( | |
139 | host, f'content/{video_id}', video_id, note='Downloading content info API JSON'), lambda x: x['playerContentInfo']) | |
140 | # Fallback | |
141 | if not content_info: | |
142 | webpage = self._download_webpage(url, video_id) | |
143 | player_config = self._parse_json( | |
144 | self._search_regex( | |
145 | r'config\s*=\s*({.+?})\s*;', webpage, 'mainstreaming player config', | |
146 | default='{}', flags=re.DOTALL), | |
147 | video_id, transform_source=js_to_json, fatal=False) or {} | |
148 | content_info = player_config['contentInfo'] | |
149 | ||
150 | host = content_info.get('host') or host | |
151 | video_id = content_info.get('contentID') or video_id | |
152 | title = content_info.get('title') | |
153 | description = traverse_obj(content_info, 'longDescription', 'shortDescription', expected_type=str) | |
154 | live_status = 'not_live' | |
155 | if content_info.get('drmEnabled'): | |
156 | self.report_drm(video_id) | |
157 | ||
158 | alternative_content_id = content_info.get('alternativeContentID') | |
159 | if alternative_content_id: | |
160 | self.report_warning(f'Ignoring alternative content ID: {alternative_content_id}') | |
161 | ||
162 | content_type = int_or_none(content_info.get('contentType')) | |
163 | format_base_url = None | |
164 | formats = [] | |
165 | subtitles = {} | |
166 | # Live content | |
167 | if content_type == 20: | |
168 | dvr_enabled = traverse_obj(content_info, ('playerSettings', 'dvrEnabled'), expected_type=bool) | |
169 | format_base_url = f"https://{host}/live/{content_info['liveSourceID']}/{video_id}/%s{'?DVR' if dvr_enabled else ''}" | |
170 | live_status = 'is_live' | |
171 | heartbeat = self._call_api(host, f'heartbeat/{video_id}', video_id, note='Checking stream status') or {} | |
172 | if heartbeat.get('heartBeatUp') is False: | |
173 | self.raise_no_formats(f'MainStreaming said: {heartbeat.get("responseMessage")}', expected=True) | |
174 | live_status = 'was_live' | |
175 | ||
176 | # Playlist | |
177 | elif content_type == 31: | |
178 | return self.playlist_result( | |
179 | self._playlist_entries(host, content_info.get('playlistContents')), video_id, title, description) | |
180 | # Normal video content? | |
181 | elif content_type == 10: | |
182 | format_base_url = f'https://{host}/vod/{video_id}/%s' | |
183 | # Progressive format | |
184 | # Note: in https://webtools.msvdn.net/loader/playerV2.js there is mention of original.mp3 format, | |
185 | # however it seems to be the same as original.mp4? | |
186 | formats.append({'url': format_base_url % 'original.mp4', 'format_note': 'original', 'quality': 1}) | |
187 | else: | |
188 | self.raise_no_formats(f'Unknown content type {content_type}') | |
189 | ||
190 | if format_base_url: | |
191 | m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles( | |
192 | format_base_url % 'playlist.m3u8', video_id=video_id, fatal=False) | |
193 | mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles( | |
194 | format_base_url % 'manifest.mpd', video_id=video_id, fatal=False) | |
195 | ||
196 | subtitles = self._merge_subtitles(m3u8_subs, mpd_subs) | |
197 | formats.extend(m3u8_formats + mpd_formats) | |
198 | ||
9c634ef8 | 199 | return { |
200 | 'id': video_id, | |
201 | 'title': title, | |
202 | 'description': description, | |
203 | 'formats': formats, | |
204 | 'live_status': live_status, | |
205 | 'duration': parse_duration(content_info.get('duration')), | |
206 | 'tags': content_info.get('tags'), | |
207 | 'subtitles': subtitles, | |
add96eb9 | 208 | 'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster'), |
9c634ef8 | 209 | } |